- Load packages
library(tidyverse)
library(MASS)
- Simulate the response with the normal distribution. Also construct the design matrix.
t <- 3; # the number of treatment levels
r<- 4; # the number of replicates
n <- t*r
levels <- c("level 1", "level 2", "level 3");
fact <- gl(t,r,labels = levels)
crd <- tibble( treatment = fact )
set.seed(13579)
mu <- 500 # reference value
tau <- c(50,0,-30) # treatment effects = value wrt mu
sd <- 10 # overall sd
means <- mu + tau %>% rep(each=r) # vector of means
y<-rnorm(n, mean = means , sd = sd)
crd$response<-y
# construct the design matrix
Z <- model.matrix(~ fact-1)
X <- cbind(1, as.matrix(Z))
colnames(X)<-c("reference", "effect 1", "effect 2", "effect 3")
- Compute the two generalized inverses.
ginv1 <- ginv(t(X)%*%X)
ginv2 <- rbind(0, cbind(0, solve(t(Z)%*%Z)))
beta1 <- ginv1%*%t(X)%*%y
beta2 <- ginv2%*%t(X)%*%y
beta1
## [,1]
## [1,] 380.13679
## [2,] 159.19078
## [3,] 131.24750
## [4,] 89.69851
beta2
## [,1]
## 0.0000
## factlevel 1 539.3276
## factlevel 2 511.3843
## factlevel 3 469.8353
- Check which linear combinations are estimable
lambda1 <- c(0,1,-1,0)
lambda2 <- c(1,1,0,0)
lambda3 <- c(1,-1,0,0)
lambda4 <- c(0,1,0,0)
# you can check for estimability
qr(rbind(X,lambda1))$rank
## [1] 3
qr(rbind(X,lambda2))$rank
## [1] 3
qr(rbind(X,lambda3))$rank
## [1] 4
qr(rbind(X,lambda4))$rank
## [1] 4
- Verify the last statement is true
t(lambda1)%*%ginv1%*%t(X)%*%y
## [,1]
## [1,] 27.94328
t(lambda1)%*%ginv2%*%t(X)%*%y
## [,1]
## [1,] 27.94328
t(lambda2)%*%ginv1%*%t(X)%*%y
## [,1]
## [1,] 539.3276
t(lambda2)%*%ginv2%*%t(X)%*%y
## [,1]
## [1,] 539.3276
- Verify the equation
X%*%ginv1%*%t(X)-X%*%ginv2%*%t(X)
## 1 2 3 4 5
## 1 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 4.163336e-17
## 2 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 4.163336e-17
## 3 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 4.163336e-17
## 4 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 4.163336e-17
## 5 1.110223e-16 1.110223e-16 1.110223e-16 1.110223e-16 0.000000e+00
## 6 1.110223e-16 1.110223e-16 1.110223e-16 1.110223e-16 0.000000e+00
## 7 1.110223e-16 1.110223e-16 1.110223e-16 1.110223e-16 0.000000e+00
## 8 1.110223e-16 1.110223e-16 1.110223e-16 1.110223e-16 0.000000e+00
## 9 5.551115e-17 5.551115e-17 5.551115e-17 5.551115e-17 -1.387779e-17
## 10 5.551115e-17 5.551115e-17 5.551115e-17 5.551115e-17 -1.387779e-17
## 11 5.551115e-17 5.551115e-17 5.551115e-17 5.551115e-17 -1.387779e-17
## 12 5.551115e-17 5.551115e-17 5.551115e-17 5.551115e-17 -1.387779e-17
## 6 7 8 9 10
## 1 4.163336e-17 4.163336e-17 4.163336e-17 5.551115e-17 5.551115e-17
## 2 4.163336e-17 4.163336e-17 4.163336e-17 5.551115e-17 5.551115e-17
## 3 4.163336e-17 4.163336e-17 4.163336e-17 5.551115e-17 5.551115e-17
## 4 4.163336e-17 4.163336e-17 4.163336e-17 5.551115e-17 5.551115e-17
## 5 0.000000e+00 0.000000e+00 0.000000e+00 -1.387779e-17 -1.387779e-17
## 6 0.000000e+00 0.000000e+00 0.000000e+00 -1.387779e-17 -1.387779e-17
## 7 0.000000e+00 0.000000e+00 0.000000e+00 -1.387779e-17 -1.387779e-17
## 8 0.000000e+00 0.000000e+00 0.000000e+00 -1.387779e-17 -1.387779e-17
## 9 -1.387779e-17 -1.387779e-17 -1.387779e-17 5.551115e-17 5.551115e-17
## 10 -1.387779e-17 -1.387779e-17 -1.387779e-17 5.551115e-17 5.551115e-17
## 11 -1.387779e-17 -1.387779e-17 -1.387779e-17 5.551115e-17 5.551115e-17
## 12 -1.387779e-17 -1.387779e-17 -1.387779e-17 5.551115e-17 5.551115e-17
## 11 12
## 1 5.551115e-17 5.551115e-17
## 2 5.551115e-17 5.551115e-17
## 3 5.551115e-17 5.551115e-17
## 4 5.551115e-17 5.551115e-17
## 5 -1.387779e-17 -1.387779e-17
## 6 -1.387779e-17 -1.387779e-17
## 7 -1.387779e-17 -1.387779e-17
## 8 -1.387779e-17 -1.387779e-17
## 9 5.551115e-17 5.551115e-17
## 10 5.551115e-17 5.551115e-17
## 11 5.551115e-17 5.551115e-17
## 12 5.551115e-17 5.551115e-17
- Find a third linearly indep. combination
aux2 <- rbind(lambda1,lambda2)
qr(aux2)$rank
## [1] 2
aux3 <- rbind(lambda1,lambda2,lambda3)
qr(aux3)$rank
## [1] 3
- Verify the first one corresponds to the means model
lambda1 <- c(1,1,0,0)
lambda2 <- c(1,0,1,0)
lambda3 <- c(1,0,0,0)
Lambda=rbind(lambda1,lambda2,lambda3)
gama<-c(1,1,0,0) # arbitrary vector as long as U is non-singular
# gama<-c(1,1,0,0) # alternative 1
gama<-c(0,0,1,1) # alternative 2
U <- rbind(Lambda,gama)
qr(U)$rank
## [1] 4
W<-solve(U)
W1<-W[,1:3]
eta <- W[,4]
X%*%eta # should be zero vector
## [,1]
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
## 7 0
## 8 0
## 9 1
## 10 1
## 11 1
## 12 1
Z1 <- X%*%W1
qr(Z1)$rank # new design matrix
## [1] 3
t(W)%*%t(X)%*%X%*%W # check it diagonalises X'X
## lambda1 lambda2 lambda3 gama
## lambda1 4 0 0 0
## lambda2 0 8 -8 -4
## lambda3 0 -8 16 8
## gama 0 -4 8 4
# second reparametrisation
# uses eigenvectors to guarantee diagonalisation
E <- t(eigen(t(X)%*%X)$vectors)
Lambda=E[1:3,]
gama<-E[4,]
U<-rbind(Lambda, gama)
qr(U)$rank
## [1] 4
W<-solve(U)
W1<-W[,1:3]
X%*%eta # should be zero vector
## [,1]
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
## 7 0
## 8 0
## 9 1
## 10 1
## 11 1
## 12 1
Z2 <- X%*%W1
qr(Z2)$rank # new design matrix
## [1] 3
t(W)%*%t(X)%*%X%*%W # check it diagonalises X'X
## gama
## 1.600000e+01 1.415592e-16 -6.331015e-17 3.877810e-15
## 3.538981e-17 4.000000e+00 6.780619e-16 9.694524e-16
## -4.278674e-16 6.546001e-16 4.000000e+00 5.253632e-16
## gama 2.307555e-15 6.879111e-16 4.658665e-16 4.807121e-31
- Verify
# enough to check the vectors
d1 <- y-X%*%ginv1%*%t(X)%*%y
d2 <- y-X%*%ginv2%*%t(X)%*%y
d1-d2
## [,1]
## 1 -2.273737e-13
## 2 -2.273737e-13
## 3 -2.273737e-13
## 4 -2.273737e-13
## 5 -2.273737e-13
## 6 -2.273737e-13
## 7 -2.273737e-13
## 8 -2.273737e-13
## 9 -2.273737e-13
## 10 -2.273737e-13
## 11 -2.273737e-13
## 12 -2.273737e-13
- Compute F and compare to aov in R
SSE <- t(d1)%*%d1
d0 <- y-mean(y)
SSE0 <- t(d0)%*%d0
FF<-((n-t)/(t-1))*(SSE0-SSE)/SSE
FF
## [,1]
## [1,] 77.39836
aov1<-aov(response~treatment,crd)
summary(aov1)
## Df Sum Sq Mean Sq F value Pr(>F)
## treatment 2 9782 4891 77.4 2.14e-06 ***
## Residuals 9 569 63
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1