In HW2, you will simulate and analyze various bivariate random vectors, (X, Y ), as discussed in Chapter 6 of the textbook. Let Z1, Z2, Z3, · · · , be the a sequence of independent variables following a standard normal N(0,1) distribution. Each of the (Xi,Yi),i = 1,2,··· ,100 is generated according to the following:

Question A. X=2Z1 + Z2 + 3Z3 + 2 and Y =Z1+2*Z2+Z3+2.

Solution

To set up for the simulations this first block of code defines N, the number of random samples to simulate, the means of the random variables, and and the covariance matrix. It also provides a small function for drawing confidence ellipses on the simulated data.

n<-100
data1<- matrix (data=0, ncol=2,nrow=n)
for (i in 1:n)
  {
    z1 <- rnorm(1) 
    z2 <- rnorm(1)
    z3 <- rnorm(1)
           
    x <- 2*z1 + z2 + 3*z3 + 2
    y <- z1 + 2*z2 +z3 + 2
      
      data1[i,1] <- x
      data1[i,2] <- y
}

plot(data1[,1], data1[,2])

X & Y has strong positive correlation

library(pander)
pander(colMeans(data1))

2.022 and 1.859

pander(cov(data1))

15.88	7.722
7.722	6.082

pander(cor(data1))

1	0.7859
0.7859	1

pander(sapply(as.data.frame(data1), sd))

V1	V2
3.985	2.466

pander(var(data1))

15.88	7.722
7.722	6.082

Fig 2 In this example we cannot say one of these measurements directly causes the other. The X and Y appear to be positively associated with each other. Both are probably the result of additional factors. While there are attributes we can say about either Y or X scores individually, the chief objective of multivariate analysis is to describe the association between these two measurements.

library(MVA)

## Loading required package: HSAUR2

## Loading required package: tools

biv <- data1[,1:2]            #   Extract bivariate data

### pdf(file = "data1.pdf")
op <- par(mfrow=c(2, 3), mar = c(4, 4, 4, 0.25), cex.lab = 1.75)  

plot(biv[, 1], biv[, 2], pch = 16,        #  Naive scatterplot
     cex = 1.5, col = "blue", cex.lab = 1.75,
     xlab = "x", 
     ylab = "y")

bvbox(biv, xlab = "x", yaxt = "n", ylab = "", cex.lab = 1.75,
      pch = 16, cex = 1.5, col = "blue", method = "NOT")

library(aplpack)
nlev <- 5
colors <- heat.colors(9)[3 : (nlev + 2)]
plothulls(biv, n.hull = 5, col.hull = colors, cex.lab = 1.75, 
          xlab = "x", ylab = " ", yaxt = "n", 
          lty.hull = 1 : nlev, density = NA, col = 0, main = " ")

## [[1]]
##           x.hull     y.hull
## [1,]  5.33804504 -0.2061418
## [2,] -3.58450311 -3.9998133
## [3,] -6.10153388 -3.5966387
## [4,] -6.41008821 -2.1101181
## [5,] -6.42096751  0.5982512
## [6,]  0.09996614  4.6868159
## [7,] 12.82866712  9.0814484
## [8,] 11.03820888  4.2070272
## 
## [[2]]
##           x.hull     y.hull
##  [1,]  2.2062369 -0.8372498
##  [2,] -3.6315936 -3.1731785
##  [3,] -4.8683537 -2.5464330
##  [4,] -5.2037868 -1.1777342
##  [5,] -4.3305154 -0.2494447
##  [6,]  0.5528526  4.5938609
##  [7,]  5.5615770  6.1558317
##  [8,]  8.1003304  6.6691154
##  [9,] 11.3496015  5.6435152
## [10,]  7.4508284  2.1156051
## 
## [[3]]
##         x.hull     y.hull
## [1,]  5.548045  1.0674956
## [2,] -3.295375 -2.9628625
## [3,] -4.442771 -1.5663841
## [4,] -3.966257 -0.3896893
## [5,]  2.141447  4.8182424
## [6,]  6.400774  5.6841963
## [7,]  9.615275  5.5103152
## [8,]  7.418937  2.2123755
## 
## [[4]]
##           x.hull     y.hull
##  [1,]  3.9992289  0.3932383
##  [2,] -0.8725405 -1.7632315
##  [3,] -1.6312832 -1.9691657
##  [4,] -3.5551940 -1.5826701
##  [5,] -3.4251585 -0.4499539
##  [6,] -1.1102825  1.9990822
##  [7,]  1.6314955  4.3555032
##  [8,]  2.6662978  4.8237337
##  [9,]  4.1930464  5.1968039
## [10,]  6.9337388  5.3571865
## [11,]  8.0905374  5.0124552
## [12,]  8.6293250  4.1517296
## 
## [[5]]
##         x.hull     y.hull
## [1,]  1.441271 -0.5087437
## [2,] -1.507719 -1.6504392
## [3,] -2.173425 -1.4219041
## [4,] -2.513290 -0.3243848
## [5,]  1.454038  4.0346278
## [6,]  6.354342  5.0582116
## [7,]  7.095443  4.4977525
## [8,]  7.022404  2.9843716

points(biv, pch = 16, cex = 1.5, col = "blue")

par(op)

It is clear that the correlation in the data1 plotted in Fig. 3 corresponds to a positive correlation. Correlations close to either −1 or +1 result in thin ellipses corresponding to the almost perfect linear relationship between the two variables. Figure 3 : Bivariate normal contours

corcon <- function(x, y, correl)
{
  nx <- length(x)
  ny <- length(y)
  z <- matrix(rep(0, nx * ny), nx, ny)
  for (i in 1 : nx)
  {
    for (j in 1 : ny)
    {
      z[i, j] <- dmvnorm(c(x[i], y[j]), c(0, 0),
                         matrix(c(1, correl, correl, 1), 2, 2)) 
    }
  }
  z
}

library(MVA)
library(mvtnorm)

del <- .05      # how fine the grid
lim <- 3.25     # std normals plotted on +/- lim

### pdf(file = "bivnormal.pdf")
par(mfrow = c(2, 4), mar = c(5, 0, 5, 0), cex.lab = 1.5)   # Four plots across
contour(corcon(seq(-lim, lim, del), seq(-lim, lim, del), 
               correl = -.5), col = "blue",
        xlab = expression(rho == 1), cex.lab = 1.5,
        drawlabels = FALSE, axes = FALSE, frame = TRUE)
contour(corcon(seq(-lim, lim, del), seq(-lim, lim, del), 0),
        xlab = expression(rho == .73), cex.lab=1.5, col = "blue",
        drawlabels = FALSE, axes = FALSE, frame = TRUE)
par(op)

A bivariate normal density function with correlation 0.73, drawn using image and persp, both in the graphics package. The contour plots of Fig 4. Two other graphical methods for depicting the bivariate normal density function are given in Fig. 4 using a heat image and a perspective plot.

Figure 4: Heat map and profile

library(MASS)
library(mvtnorm)
library(graphics)

layout(t(matrix(c(1 : 2, rep(0, 2)), 2, 2)), widths=c(1, 1))

del <- .0125      # how fine the grid
lim <- 1.25      # std normals plotted on +/- lim

image(corcon(seq( -lim, lim, del), seq(-lim, lim, del), 
             correl = 0.73), axes = FALSE) 

del <- .1        # how fine the grid
lim <- 2.7       # std normals plotted on +/- lim

persp(corcon(seq(-lim, lim, del), seq( -lim, lim, del), 
             correl = 0.73),
      axes = FALSE, xlab = " ", ylab = " ", box = FALSE, 
      col = "lightblue", shade = .05)

par(op)

Figure 5: Bivariate confidence ellipse

library(MASS)
library(MVA)

biv <- data1[, 1: 2]          #   Extract bivariate data

bivCI <- function(s, xbar, n, alpha, m)
  #  returns m (x,y) coordinates of 1-alpha joint confidence ellipse of mean
  
{
  x <- sin( 2* pi * (0 : (m - 1) )/ (m - 1))    # m points on a unit circle
  y <- cos( 2* pi * (0 : (m - 1)) / (m - 1))
  cv <-  qchisq(1 - alpha, 2)                   # chisquared critical value
  cv <- cv / n                                  # value of quadratic form
  for (i in 1 : m)
  {
    pair <- c(x[i], y[i])        # ith (x,y) pair
    q <- pair %*% solve(s, pair)  # quadratic form
    x[i] <- x[i] * sqrt(cv / q) + xbar[1]
    y[i] <- y[i] * sqrt(cv / q) + xbar[2]
  }
  return(cbind(x, y))
}
#
plot(biv, col = "red", pch = 16, cex.lab = 1.5)
lines(bivCI(var(biv), colMeans(biv), dim(biv)[1], .01, 1600), type = "l",
      col = "blue")
lines(bivCI(var(biv), colMeans(biv), dim(biv)[1], .05, 1600), 
      type = "l", col = "green", lwd = 1)
lines(colMeans(biv)[1], colMeans(biv)[2], pch = 3, cex = .8, type = "p", 
      lwd = 1)

The joint confidence contours are values of μ such that where χ2 is the upper 1 − α quantile of a chi-squared variate with 2 df. This figure was drawn in R using the program code for the confidence ellipse of the bivariat.

The bivCI function is defined and returns m points on a 1−α confidence ellipse. The code in this output plots the data. We next use lines to draw the 99 and 95 % confidence ellipses. The last lines statement puts the “+” sign in the center of the figure

Figure 6 : Plot data and identify extremes

plot(data1[, 1], data1[, 2], xlab = "x", 
     ylab = "y", pch = 19, col= "red", cex = 1.5)
ext <- c(3, 9, 29)      # identified extremes
text(data1[ext, 1], data1[ext, 2], 
     labels = row.names(data1)[ext], pos = c(3, 2, 3), col = "blue")

rc <- glm(data1[, 2] ~ data1[, 1])$coefficients # regression coefficients
xb <- range(data1[,1]) * c( .9, 1.1)
yb <- rc[1] + xb * rc[2]
lines(xb, yb, col = "green")
text(12, 8, labels = "regression line", col = "green")

lines(xb, xb, col = "blue")  # diagonal line
text(11, 9, labels = "equal rates line", col = "blue")

Figure 6 plots the bivariate data. Two outliers are identified in this bivariate plot, but are not extreme in either of the X & Y. Multivariate outliers can appear but not be excessive in any of their marginal components. Other than these two points, there is nothing to indicate a lack of fit to the bivariate normal distribution. Figure 6 also contains a 45◦ line illustrating that most of the x rates were higher than those in y. The correlation (0.73) of the data values is high across the x & y.

The parameter estimates are approximately normally distributed with these standard errors so we can easily construct confidence intervals as we show in the above summary.

se <- sqrt(diag(solve(nlm.out$hessian))) 
q <- qnorm(.975)                      # 95% interval quantile
upper <- nlm.out$estimate + q * se    # upper 95% CI intervals
lower <- nlm.out$estimate - q * se    # lower 95% CI intervals
summary <- data.frame(              
  cbind(nlm.out$estimate, se, lower, upper),
  row.names = c("mean1", "mean2", "var1", "var2", "rho"))
colnames(summary)[1] <- "estimate"
pander(summary, digits = 3)

	estimate	se	lower	upper
mean1	2.02	0.396	1.24	2.8
mean2	1.86	0.245	1.38	2.34
var1	15.7	2.22	11.4	20.1
var2	6.02	0.852	4.35	7.69
rho	0.786	0.0382	0.711	0.861

End of question 1

Question 2. X=(Z1+Z2+Z3)/3+1andY =(Z4+Z5)/2+2

Solution

n<-100
data2<- matrix (data=0, ncol=2,nrow=n)
for (i in 1:n)
  {
    z1 <- rnorm(1) 
    z2 <- rnorm(1)
    z3 <- rnorm(1)
    z4 <- rnorm(1)
    z5 <- rnorm(1)
           
    x <- (z1 + z2 + z3)/3 + 1 
    y <- (z4 + z5)/2 +2
      
      data2[i,1] <- x
      data2[i,2] <- y
}

X & Y has poor positive or no correlation

colMeans(data2) # data summary

## [1] 0.9481753 2.0428250

plot(data2[,1], data2[,2])

cov(data2)

##            [,1]       [,2]
## [1,] 0.33709664 0.01598345
## [2,] 0.01598345 0.51722488

cor(data2)

##            [,1]       [,2]
## [1,] 1.00000000 0.03827838
## [2,] 0.03827838 1.00000000

sapply(as.data.frame(data2), sd)

##        V1        V2 
## 0.5806002 0.7191835

var(data2)

##            [,1]       [,2]
## [1,] 0.33709664 0.01598345
## [2,] 0.01598345 0.51722488

In this example we cannot say one of these measurements directly causes the other. The X and Y appear to be poor or not associated with each other.

library(MVA)

biv <- data2[,1:2]            #   Extract bivariate data

### pdf(file = "data1.pdf")
op <- par(mfrow=c(2, 3), mar = c(4, 4, 4, 0.25), cex.lab = 1.75)  

plot(biv[, 1], biv[, 2], pch = 16,        #  Naive scatterplot
     cex = 1.5, col = "blue", cex.lab = 1.75,
     xlab = "x", 
     ylab = "y")

bvbox(biv, xlab = "x", yaxt = "n", ylab = "", cex.lab = 1.75,
      pch = 16, cex = 1.5, col = "blue", method = "NOT")

library(aplpack)
nlev <- 5
colors <- heat.colors(9)[3 : (nlev + 2)]
plothulls(biv, n.hull = 5, col.hull = colors, cex.lab = 1.75, 
          xlab = "x", ylab = " ", yaxt = "n", 
          lty.hull = 1 : nlev, density = NA, col = 0, main = " ")

## [[1]]
##          x.hull     y.hull
## [1,]  2.1572167  0.8760461
## [2,]  0.8364637 -0.4546270
## [3,] -0.2521782  0.5900014
## [4,] -0.4579461  1.8601715
## [5,] -0.3448260  3.0547369
## [6,]  0.9122831  3.9175012
## [7,]  1.4591836  3.7005412
## [8,]  2.8473959  2.1315607
## 
## [[2]]
##            x.hull    y.hull
##  [1,]  2.23479887 1.6124006
##  [2,]  1.53808560 0.4898401
##  [3,]  0.52915507 0.2943929
##  [4,]  0.03629734 0.8694447
##  [5,] -0.06165071 2.6207765
##  [6,]  0.21008831 3.0550531
##  [7,]  1.29132818 3.3546665
##  [8,]  1.63568785 3.3050194
##  [9,]  2.04563136 2.6397564
## 
## [[3]]
##           x.hull    y.hull
##  [1,] 1.57404235 1.2490593
##  [2,] 0.89561294 0.8800368
##  [3,] 0.54120754 0.7908199
##  [4,] 0.03045572 1.7364729
##  [5,] 0.11093429 2.2230410
##  [6,] 0.43905785 2.6892550
##  [7,] 0.57182132 2.8591373
##  [8,] 0.66620458 2.9292239
##  [9,] 1.59105977 2.8518858
## [10,] 1.98889222 2.4231020
## [11,] 2.04706960 2.0767331
## 
## [[4]]
##          x.hull   y.hull
##  [1,] 1.7848051 1.825504
##  [2,] 1.4840362 1.282293
##  [3,] 0.9265025 1.099944
##  [4,] 0.6137788 1.057861
##  [5,] 0.3980047 1.440714
##  [6,] 0.1476203 2.184466
##  [7,] 0.4373503 2.648200
##  [8,] 0.5323940 2.757300
##  [9,] 0.6203036 2.855299
## [10,] 0.9472997 2.878213
## [11,] 1.5721129 2.752698
## [12,] 2.0010956 2.272905
## 
## [[5]]
##          x.hull   y.hull
##  [1,] 1.6805666 1.816663
##  [2,] 1.6389311 1.655520
##  [3,] 0.9350597 1.159334
##  [4,] 0.6436355 1.114808
##  [5,] 0.3828894 1.685140
##  [6,] 0.3238998 2.278160
##  [7,] 0.4514905 2.569330
##  [8,] 0.6814399 2.839732
##  [9,] 0.7119226 2.837626
## [10,] 1.5361893 2.689114
## [11,] 1.7780711 2.468717

points(biv, pch = 16, cex = 1.5, col = "blue")

par(op)

It is clear that the correlation in the data1 plotted below corresponds to no correlation. Correlations close to either −1 or +1 result in thin ellipses corresponding to the almost perfect linear relationship between the two variables.

Bivariate normal contours

corcon <- function(x, y, correl)
{
  nx <- length(x)
  ny <- length(y)
  z <- matrix(rep(0, nx * ny), nx, ny)
  for (i in 1 : nx)
  {
    for (j in 1 : ny)
    {
      z[i, j] <- dmvnorm(c(x[i], y[j]), c(0, 0),
                         matrix(c(1, correl, correl, 1), 2, 2)) 
    }
  }
  z
}

library(MVA)
library(mvtnorm)

del <- .05      # how fine the grid
lim <- 3.25     # std normals plotted on +/- lim

### pdf(file = "bivnormal.pdf")
par(mfrow = c(2, 4), mar = c(5, 0, 5, 0), cex.lab = 1.5)   # Four plots across
contour(corcon(seq(-lim, lim, del), seq(-lim, lim, del), 
               correl = -.5), col = "blue",
        xlab = expression(rho == 1), cex.lab = 1.5,
        drawlabels = FALSE, axes = FALSE, frame = TRUE)
contour(corcon(seq(-lim, lim, del), seq(-lim, lim, del), 0),
        xlab = expression(rho == .11), cex.lab=1.5, col = "blue",
        drawlabels = FALSE, axes = FALSE, frame = TRUE)
par(op)

A bivariate normal density function with correlation 0.11, drawn using image and persp, both in the graphics package. The contour plots below Two other graphical methods for depicting the bivariate normal density function are given below using a heat image and a perspective plot.

Figure : Heat map and profile

library(MASS)
library(mvtnorm)
library(graphics)

layout(t(matrix(c(1 : 2, rep(0, 2)), 2, 2)), widths=c(1, 1))

del <- .0125      # how fine the grid
lim <- 1.25      # std normals plotted on +/- lim

image(corcon(seq( -lim, lim, del), seq(-lim, lim, del), 
             correl = 0.11), axes = FALSE) 

del <- .1        # how fine the grid
lim <- 2.7       # std normals plotted on +/- lim

persp(corcon(seq(-lim, lim, del), seq( -lim, lim, del), 
             correl = 0.11),
      axes = FALSE, xlab = " ", ylab = " ", box = FALSE, 
      col = "lightblue", shade = .05)

par(op)

Figure: Bivariate confidence ellipse

library(MASS)
library(MVA)

biv <- data1[, 1: 2]          #   Extract bivariate data

bivCI <- function(s, xbar, n, alpha, m)
  #  returns m (x,y) coordinates of 1-alpha joint confidence ellipse of mean
  
{
  x <- sin( 2* pi * (0 : (m - 1) )/ (m - 1))    # m points on a unit circle
  y <- cos( 2* pi * (0 : (m - 1)) / (m - 1))
  cv <-  qchisq(1 - alpha, 2)                   # chisquared critical value
  cv <- cv / n                                  # value of quadratic form
  for (i in 1 : m)
  {
    pair <- c(x[i], y[i])        # ith (x,y) pair
    q <- pair %*% solve(s, pair)  # quadratic form
    x[i] <- x[i] * sqrt(cv / q) + xbar[1]
    y[i] <- y[i] * sqrt(cv / q) + xbar[2]
  }
  return(cbind(x, y))
}
#
plot(biv, col = "red", pch = 16, cex.lab = 1.5)
lines(bivCI(var(biv), colMeans(biv), dim(biv)[1], .01, 1600), type = "l",
      col = "blue")
lines(bivCI(var(biv), colMeans(biv), dim(biv)[1], .05, 1600), 
      type = "l", col = "green", lwd = 1)
lines(colMeans(biv)[1], colMeans(biv)[2], pch = 3, cex = .8, type = "p", 
      lwd = 1)

Figure 6 : Plot data and identify extremes

plot(data2[, 1], data2[, 2], xlab = "x", 
     ylab = "y", pch = 19, col= "red", cex = 1.5)
ext <- c(3, 9, 29)      # identified extremes
text(data2[ext, 1], data2[ext, 2], 
     labels = row.names(data2)[ext], pos = c(3, 2, 3), col = "blue")

rc <- glm(data2[, 2] ~ data2[, 1])$coefficients # regression coefficients
xb <- range(data2[,1]) * c( .9, 1.1)
yb <- rc[1] + xb * rc[2]
lines(xb, yb, col = "green")
text(12, 8, labels = "regression line", col = "green")

lines(xb, xb, col = "blue")  # diagonal line
text(11, 9, labels = "equal rates line", col = "blue")

Figure below contains a 40◦ line illustrating that most of the x rates were higher than those in y. The correlation (0.11) of the data values is high across the x & y.

The parameter estimates are approximately normally distributed with these standard errors so we can easily construct confidence intervals as we show in the summary below.

se <- sqrt(diag(solve(nlm.out$hessian))) 
q <- qnorm(.975)                      # 95% interval quantile
upper <- nlm.out$estimate + q * se    # upper 95% CI intervals
lower <- nlm.out$estimate - q * se    # lower 95% CI intervals
summary <- data.frame(              
  cbind(nlm.out$estimate, se, lower, upper),
  row.names = c("mean1", "mean2", "var1", "var2", "rho"))
colnames(summary)[1] <- "estimate"
pander(summary, digits = 3)

	estimate	se	lower	upper
mean1	1.92	0.283	1.37	2.48
mean2	3.76	0.579	2.63	4.9
var1	5.89	0.822	4.28	7.5
var2	18.5	5.37	7.94	29
rho	0.982	0.00239	0.977	0.986

X = min(Z1, Z2), Y = max(Z1, Z2)

Solution

To set up for the simulations this first block of code defines N, the number of random samples to simulate, the means of the random variables, and and the covariance matrix.

n<-100
data3<- matrix (data=0, ncol=2,nrow=n)
for (i in 1:n)
  {
    z1 <- rnorm(1) 
    z2 <- rnorm(1)
    
           
    x <- min(z1, z2)
    y <- max(z1, z2)
      
      data3[i,1] <- x
      data3[i,2] <- y
}

X & Y have positive correlation

colMeans(data3)

## [1] -0.4611657  0.5717905

plot(data3[,1], data3[,2])

cov(data3)

##           [,1]      [,2]
## [1,] 0.7686831 0.4575280
## [2,] 0.4575280 0.7685248

cor(data3)

##           [,1]      [,2]
## [1,] 1.0000000 0.5952714
## [2,] 0.5952714 1.0000000

sapply(as.data.frame(data3), sd)

##        V1        V2 
## 0.8767457 0.8766555

var(data3)

##           [,1]      [,2]
## [1,] 0.7686831 0.4575280
## [2,] 0.4575280 0.7685248

In this example we cannot say one of these measurements directly causes the other. The X and Y appear to be positively associated with each other.

library(MVA)

biv <- data3[,1:2]            #   Extract bivariate data

### pdf(file = "data1.pdf")
op <- par(mfrow=c(2, 3), mar = c(4, 4, 4, 0.25), cex.lab = 1.75)  

plot(biv[, 1], biv[, 2], pch = 16,        #  Naive scatterplot
     cex = 1.5, col = "blue", cex.lab = 1.75,
     xlab = "x", 
     ylab = "y")

bvbox(biv, xlab = "x", yaxt = "n", ylab = "", cex.lab = 1.75,
      pch = 16, cex = 1.5, col = "blue", method = "NOT")

library(aplpack)
nlev <- 5
colors <- heat.colors(9)[3 : (nlev + 2)]
plothulls(biv, n.hull = 5, col.hull = colors, cex.lab = 1.75, 
          xlab = "x", ylab = " ", yaxt = "n", 
          lty.hull = 1 : nlev, density = NA, col = 0, main = " ")

## [[1]]
##            x.hull     y.hull
##  [1,]  0.09987301  0.1375563
##  [2,] -0.48890469 -0.4838052
##  [3,] -0.98933938 -0.9705738
##  [4,] -1.10837075 -1.0436124
##  [5,] -1.79391928 -1.3320975
##  [6,] -1.94247636 -1.2431054
##  [7,] -2.58923565 -0.2691091
##  [8,] -2.21292511  1.5239831
##  [9,] -0.93322695  2.1170302
## [10,]  0.73253674  2.6816003
## [11,]  1.92260172  2.1598226
## [12,]  1.07730758  1.1818560
## 
## [[2]]
##             x.hull     y.hull
##  [1,] -0.473495159 -0.4351087
##  [2,] -0.677292176 -0.5866287
##  [3,] -1.164727952 -0.9382412
##  [4,] -1.465199960 -1.1180442
##  [5,] -1.858087381 -1.0800580
##  [6,] -2.060049335 -0.1210923
##  [7,] -1.957218993  0.7829894
##  [8,] -1.617335014  1.3177952
##  [9,] -1.520959840  1.4209003
## [10,] -1.393993564  1.4657608
## [11,]  0.008250677  1.8437427
## [12,]  0.293409630  1.8737876
## [13,]  1.599046104  1.9667210
## [14,]  0.426345899  0.4920241
## 
## [[3]]
##           x.hull      y.hull
##  [1,]  0.2050720  0.27134538
##  [2,] -0.1403229 -0.07734334
##  [3,] -0.5813843 -0.40682164
##  [4,] -1.0976326 -0.79199932
##  [5,] -1.8265397 -0.62696904
##  [6,] -1.5263208  1.32770727
##  [7,] -0.1011638  1.73167505
##  [8,]  1.1363178  1.79275609
##  [9,]  1.1966870  1.57795618
## [10,]  0.8560219  1.05032656
## 
## [[4]]
##            x.hull     y.hull
##  [1,]  0.36015512  0.4629142
##  [2,] -0.45012291 -0.2995276
##  [3,] -0.85967757 -0.5247031
##  [4,] -1.49439589 -0.6766173
##  [5,] -1.62858555 -0.2843806
##  [6,] -1.42683528  0.5034350
##  [7,] -1.21889294  0.7892701
##  [8,] -0.05039292  1.7189627
##  [9,]  0.43299985  1.7116549
## [10,]  1.03028741  1.6286758
## [11,]  0.73174969  0.9257851
## 
## [[5]]
##           x.hull      y.hull
## [1,] -1.05066066 -0.51301703
## [2,] -1.40918271 -0.29864668
## [3,] -1.47185321 -0.08762518
## [4,] -0.94431555  0.93039439
## [5,]  0.06595774  1.71468978
## [6,]  0.51954637  1.51302148
## [7,]  0.38961167  0.62382965

points(biv, pch = 16, cex = 1.5, col = "blue")

par(op)

It is clear that the correlation in the data1 plotted below corresponds to positive correlation. Correlations close to either −1 or +1 result in thin ellipses corresponding to the almost perfect linear relationship between the two variables.

Bivariate normal contours

corcon <- function(x, y, correl)
{
  nx <- length(x)
  ny <- length(y)
  z <- matrix(rep(0, nx * ny), nx, ny)
  for (i in 1 : nx)
  {
    for (j in 1 : ny)
    {
      z[i, j] <- dmvnorm(c(x[i], y[j]), c(0, 0),
                         matrix(c(1, correl, correl, 1), 2, 2)) 
    }
  }
  z
}

library(MVA)
library(mvtnorm)

del <- .05      # how fine the grid
lim <- 3.25     # std normals plotted on +/- lim

### pdf(file = "bivnormal.pdf")
par(mfrow = c(2, 4), mar = c(5, 0, 5, 0), cex.lab = 1.5)   # Four plots across
contour(corcon(seq(-lim, lim, del), seq(-lim, lim, del), 
               correl = -.5), col = "blue",
        xlab = expression(rho == 1), cex.lab = 1.5,
        drawlabels = FALSE, axes = FALSE, frame = TRUE)
contour(corcon(seq(-lim, lim, del), seq(-lim, lim, del), 0),
        xlab = expression(rho == .40), cex.lab=1.5, col = "blue",
        drawlabels = FALSE, axes = FALSE, frame = TRUE)
par(op)

A bivariate normal density function with correlation 0.40, drawn using image and persp, both in the graphics package. The contour plots below Two other graphical methods for depicting the bivariate normal density function are given below using a heat image and a perspective plot.

Figure : Heat map and profile

library(MASS)
library(mvtnorm)
library(graphics)
layout(t(matrix(c(1 : 2, rep(0, 2)), 2, 2)), widths=c(1, 1))

del <- .0125      # how fine the grid
lim <- 1.25      # std normals plotted on +/- lim

image(corcon(seq( -lim, lim, del), seq(-lim, lim, del), 
             correl = 0.40), axes = FALSE) 

del <- .1        # how fine the grid
lim <- 2.7       # std normals plotted on +/- lim

persp(corcon(seq(-lim, lim, del), seq( -lim, lim, del), 
             correl = 0.40),
      axes = FALSE, xlab = " ", ylab = " ", box = FALSE, 
      col = "lightblue", shade = .05)

par(op)

Figure: Bivariate confidence ellipse

library(MASS)
library(MVA)

biv <- data3[, 1: 2]          #   Extract bivariate data

bivCI <- function(s, xbar, n, alpha, m)
  #  returns m (x,y) coordinates of 1-alpha joint confidence ellipse of mean
  
{
  x <- sin( 2* pi * (0 : (m - 1) )/ (m - 1))    # m points on a unit circle
  y <- cos( 2* pi * (0 : (m - 1)) / (m - 1))
  cv <-  qchisq(1 - alpha, 2)                   # chisquared critical value
  cv <- cv / n                                  # value of quadratic form
  for (i in 1 : m)
  {
    pair <- c(x[i], y[i])        # ith (x,y) pair
    q <- pair %*% solve(s, pair)  # quadratic form
    x[i] <- x[i] * sqrt(cv / q) + xbar[1]
    y[i] <- y[i] * sqrt(cv / q) + xbar[2]
  }
  return(cbind(x, y))
}
#
plot(biv, col = "red", pch = 16, cex.lab = 1.5)
lines(bivCI(var(biv), colMeans(biv), dim(biv)[1], .01, 1600), type = "l",
      col = "blue")
lines(bivCI(var(biv), colMeans(biv), dim(biv)[1], .05, 1600), 
      type = "l", col = "green", lwd = 1)
lines(colMeans(biv)[1], colMeans(biv)[2], pch = 3, cex = .8, type = "p", 
      lwd = 1)

Figure : Plot data and identify extremes

plot(data3[, 1], data3[, 2], xlab = "x", 
     ylab = "y", pch = 19, col= "red", cex = 1.5)
ext <- c(3, 9, 29)      # identified extremes
text(data3[ext, 1], data3[ext, 2], 
     labels = row.names(data3)[ext], pos = c(3, 2, 3), col = "blue")

rc <- glm(data3[, 2] ~ data3[, 1])$coefficients # regression coefficients
xb <- range(data3[,1]) * c( .9, 1.1)
yb <- rc[1] + xb * rc[2]
lines(xb, yb, col = "green")
text(12, 8, labels = "regression line", col = "green")

lines(xb, xb, col = "blue")  # diagonal line
text(11, 9, labels = "equal rates line", col = "blue")

Figure below contains a 45◦ line illustrating that most of the x rates were higher than those in y. The correlation (0.40) of the data values is high across the x & y.

The parameter estimates are approximately normally distributed with these standard errors so we can easily construct confidence intervals as we show in the above summary.

se <- sqrt(diag(solve(nlm.out$hessian))) 
q <- qnorm(.975)                      # 95% interval quantile
upper <- nlm.out$estimate + q * se    # upper 95% CI intervals
lower <- nlm.out$estimate - q * se    # lower 95% CI intervals
summary <- data.frame(              
  cbind(nlm.out$estimate, se, lower, upper),
  row.names = c("mean1", "mean2", "var1", "var2", "rho"))
colnames(summary)[1] <- "estimate"
pander(summary, digits = 3)

	estimate	se	lower	upper
mean1	-0.461	0.0872	-0.632	-0.29
mean2	0.572	0.0872	0.401	0.743
var1	0.761	0.108	0.55	0.972
var2	0.761	0.108	0.55	0.972
rho	0.595	0.0645	0.469	0.722

Hw2 M Hasan

Hasan

10/11/2020

Solution

Solution

Solution