MA50259_Lab

Estimation in the means model 1. Load the packages

library(tidyverse)
library(Matrix)
library(MASS)

Construct a matrix Z in R

t<-3; # the number of treatment levels
r<-4; # the number of replicates
n<-t*r # total number of experimental units
levels <- c("level 1", "level 2", "level 3");
fact <- rep(levels, each = r) %>% factor()
# fact <- gl(t,r,labels=levels) # alternative code

crd <- tibble( treatment = fact )

Z <- model.matrix(~ fact-1)
Z

##    factlevel 1 factlevel 2 factlevel 3
## 1            1           0           0
## 2            1           0           0
## 3            1           0           0
## 4            1           0           0
## 5            0           1           0
## 6            0           1           0
## 7            0           1           0
## 8            0           1           0
## 9            0           0           1
## 10           0           0           1
## 11           0           0           1
## 12           0           0           1
## attr(,"assign")
## [1] 1 1 1
## attr(,"contrasts")
## attr(,"contrasts")$fact
## [1] "contr.treatment"

Verify Z has rank equal to t = 3. In this case we say that the model is full rank since the matrix Z has rank equal to the number of its columns.

rankMatrix(Z)[1]

## [1] 3

# alternative code
# qr(Z)$rank

Simulate the response values and plot them against the treatment levels

set.seed(13579)
mu<-500 # reference value
tau<-c(50, 0, -30) # treatment effects = differences wrt mu
sd<-10 # overall sd
means <- mu+tau %>% rep(each = r) # vector of means
y<-rnorm(n,mean = means, sd=sd)
crd$response<-y
y

##  [1] 537.6528 537.4717 547.4522 534.7335 510.9711 524.8874 507.7948
##  [8] 501.8838 459.7355 467.4329 477.4605 474.7122

ggplot(crd,aes(treatment,response))+geom_point()

5. Find the least squares estimate.

S<-t(Z)%*%Z
mu.hat <- solve(t(Z)%*%Z)%*%t(Z)%*%y
mu.hat

##                 [,1]
## factlevel 1 539.3276
## factlevel 2 511.3843
## factlevel 3 469.8353

Verify the entries of muhat are simply the arithmetic means of the response values at each treatment level

by_group <- group_by(crd, treatment)
means.crd<-summarize(by_group, means = mean(response))
glimpse(means.crd)

## Observations: 3
## Variables: 2
## $ treatment <fct> level 1, level 2, level 3
## $ means     <dbl> 539.3276, 511.3843, 469.8353

Verify that the entries of muhat are also given in the output of the lm command

mod.crd.means<-lm(response~treatment-1, data=crd)
summary(mod.crd.means)

## 
## Call:
## lm(formula = response ~ treatment - 1, data = crd)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.100  -3.841  -1.765   5.564  13.503 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## treatmentlevel 1  539.328      3.975   135.7 3.26e-16 ***
## treatmentlevel 2  511.384      3.975   128.7 5.26e-16 ***
## treatmentlevel 3  469.835      3.975   118.2 1.13e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.949 on 9 degrees of freedom
## Multiple R-squared:  0.9998, Adjusted R-squared:  0.9998 
## F-statistic: 1.631e+04 on 3 and 9 DF,  p-value: < 2.2e-16

Using the invariance property of MLE, find estimates of the following quantities

mu.hat[2] - mu.hat[1]

## [1] -27.94328

mu.hat[3] - mu.hat[1]

## [1] -69.49227

mu.hat[3] - mu.hat[2]

## [1] -41.54899

Estimation in the treatment effects model 9. Construct the design matrix X

X <- cbind(1, as.matrix(Z))
colnames(X)<-c("reference", "effect 1", "effect 2", "effect 3")
X

##    reference effect 1 effect 2 effect 3
## 1          1        1        0        0
## 2          1        1        0        0
## 3          1        1        0        0
## 4          1        1        0        0
## 5          1        0        1        0
## 6          1        0        1        0
## 7          1        0        1        0
## 8          1        0        1        0
## 9          1        0        0        1
## 10         1        0        0        1
## 11         1        0        0        1
## 12         1        0        0        1

Verify that X has rank t = 3. We say that this model doesn’t have full rank since the rank of X is less than its number of columns.

rankMatrix(X)[1]

## [1] 3

Try to find the inverse of X.T*X. Why doesn’t this work?

# solve(t(X)%*%X)
# Because the problem with the normal equations is that X.T*T is singular and so can't be inverted.

# The first column of X is the sum of the rest of the columns

# X and X.T*X have the same column rank.

Compute the Moore-Penrose generalized inverse of X.T*X using ginv

ginv1<-ginv(t(X)%*%X)
ginv

## function (X, tol = sqrt(.Machine$double.eps)) 
## {
##     if (length(dim(X)) > 2L || !(is.numeric(X) || is.complex(X))) 
##         stop("'X' must be a numeric or complex matrix")
##     if (!is.matrix(X)) 
##         X <- as.matrix(X)
##     Xsvd <- svd(X)
##     if (is.complex(X)) 
##         Xsvd$u <- Conj(Xsvd$u)
##     Positive <- Xsvd$d > max(tol * Xsvd$d[1L], 0)
##     if (all(Positive)) 
##         Xsvd$v %*% (1/Xsvd$d * t(Xsvd$u))
##     else if (!any(Positive)) 
##         array(0, dim(X)[2L:1L])
##     else Xsvd$v[, Positive, drop = FALSE] %*% ((1/Xsvd$d[Positive]) * 
##         t(Xsvd$u[, Positive, drop = FALSE]))
## }
## <bytecode: 0x7f95416600a8>
## <environment: namespace:MASS>

Compute the type of generalised inverse in the example

ginv2<-rbind(0,cbind(0,solve(t(Z)%*%Z)))
ginv2

##               factlevel 1 factlevel 2 factlevel 3
##             0        0.00        0.00        0.00
## factlevel 1 0        0.25        0.00        0.00
## factlevel 2 0        0.00        0.25        0.00
## factlevel 3 0        0.00        0.00        0.25

Find a solution to the normal equations
Find two solutions to the normal equations based on the two types of generalized inverses desscribed above. Verify numerically that they actually solve the normal equations.

beta1 <- ginv1%*%t(X)%*%y
beta1

##           [,1]
## [1,] 380.13679
## [2,] 159.19078
## [3,] 131.24750
## [4,]  89.69851

beta2 <- ginv2%*%t(X)%*%y
beta2

##                 [,1]
##               0.0000
## factlevel 1 539.3276
## factlevel 2 511.3843
## factlevel 3 469.8353

# now we verify that the above equations solve the normal equations
t(X)%*%X%*%beta1-t(X)%*%y

##                   [,1]
## reference 1.818989e-12
## effect 1  9.094947e-13
## effect 2  6.821210e-13
## effect 3  9.094947e-13

t(X)%*%X%*%beta1-t(X)%*%y

##                   [,1]
## reference 1.818989e-12
## effect 1  9.094947e-13
## effect 2  6.821210e-13
## effect 3  9.094947e-13

Find estimates for the given equations

beta1[3] - beta1[2]

## [1] -27.94328

beta1[4] - beta1[2]

## [1] -69.49227

beta1[4] - beta1[3]

## [1] -41.54899

beta2[3] - beta2[2]

## [1] -27.94328

beta2[4] - beta2[2]

## [1] -69.49227

beta2[4] - beta2[3]

## [1] -41.54899

Find estimates as per the question

beta1[1] + beta1[2]

## [1] 539.3276

beta1[1] + beta1[3]

## [1] 511.3843

beta1[1] + beta1[3]

## [1] 511.3843

beta2[1] + beta2[2]

## [1] 539.3276

beta2[1] + beta2[3]

## [1] 511.3843

beta2[1] + beta2[4]

## [1] 469.8353

Compute the solutions using the lm command

mod.crd.treat<-lm(response~treatment, data = crd)
summary(mod.crd.treat)

## 
## Call:
## lm(formula = response ~ treatment, data = crd)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.100  -3.841  -1.765   5.564  13.503 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       539.328      3.975 135.692 3.26e-16 ***
## treatmentlevel 2  -27.943      5.621  -4.971 0.000769 ***
## treatmentlevel 3  -69.492      5.621 -12.363 5.97e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.949 on 9 degrees of freedom
## Multiple R-squared:  0.9451, Adjusted R-squared:  0.9328 
## F-statistic:  77.4 on 2 and 9 DF,  p-value: 2.137e-06

MA50259 Lab2

Martin A Hewing

15/02/2019

MA50259 Lab2

Martin A Hewing

15/02/2019

Martin A Hewing

15/02/2019

Martin A Hewing

15/02/2019

15/02/2019

15/02/2019

MA50259 Lab2 Martin A Hewing 15/02/2019

MA50259 Lab2 Martin A Hewing 15/02/2019

Martin A Hewing 15/02/2019

Martin A Hewing 15/02/2019

15/02/2019

15/02/2019

MA50259 Lab2

Martin A Hewing

15/02/2019

MA50259 Lab2

Martin A Hewing

15/02/2019

Martin A Hewing

15/02/2019

Martin A Hewing

15/02/2019