1. Load packages
library(tidyverse)
library(MASS)
  1. Simulate the response with the normal distribution. Also construct the design matrix.
t <- 3; # the number of treatment levels
r<- 4; # the number of replicates
n <- t*r
levels <- c("level 1", "level 2", "level 3");
fact <- gl(t,r,labels = levels)
crd <- tibble( treatment = fact )
set.seed(13579)
mu <- 500 # reference value
tau <- c(50,0,-30) # treatment effects = value wrt mu
sd <- 10 # overall sd
means <- mu + tau %>% rep(each=r) # vector of means
y<-rnorm(n, mean = means , sd = sd)
crd$response<-y
# construct the design matrix
Z <- model.matrix(~ fact-1)
X <- cbind(1, as.matrix(Z))
colnames(X)<-c("reference", "effect 1", "effect 2", "effect 3")
  1. Compute the two generalized inverses.
ginv1 <- ginv(t(X)%*%X)
ginv2 <- rbind(0, cbind(0, solve(t(Z)%*%Z)))
beta1 <- ginv1%*%t(X)%*%y
beta2 <- ginv2%*%t(X)%*%y
beta1
##           [,1]
## [1,] 380.13679
## [2,] 159.19078
## [3,] 131.24750
## [4,]  89.69851
beta2
##                 [,1]
##               0.0000
## factlevel 1 539.3276
## factlevel 2 511.3843
## factlevel 3 469.8353
  1. Check which linear combinations are estimable
lambda1 <- c(0,1,-1,0)
lambda2 <- c(1,1,0,0)
lambda3 <- c(1,-1,0,0)
lambda4 <- c(0,1,0,0)
# you can check for estimability
qr(rbind(X,lambda1))$rank
## [1] 3
qr(rbind(X,lambda2))$rank
## [1] 3
qr(rbind(X,lambda3))$rank
## [1] 4
qr(rbind(X,lambda4))$rank
## [1] 4
  1. Verify the last statement is true
t(lambda1)%*%ginv1%*%t(X)%*%y
##          [,1]
## [1,] 27.94328
t(lambda1)%*%ginv2%*%t(X)%*%y
##          [,1]
## [1,] 27.94328
t(lambda2)%*%ginv1%*%t(X)%*%y
##          [,1]
## [1,] 539.3276
t(lambda2)%*%ginv2%*%t(X)%*%y
##          [,1]
## [1,] 539.3276
  1. Verify the equation
X%*%ginv1%*%t(X)-X%*%ginv2%*%t(X)
##               1            2            3            4             5
## 1  0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00  4.163336e-17
## 2  0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00  4.163336e-17
## 3  0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00  4.163336e-17
## 4  0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00  4.163336e-17
## 5  1.110223e-16 1.110223e-16 1.110223e-16 1.110223e-16  0.000000e+00
## 6  1.110223e-16 1.110223e-16 1.110223e-16 1.110223e-16  0.000000e+00
## 7  1.110223e-16 1.110223e-16 1.110223e-16 1.110223e-16  0.000000e+00
## 8  1.110223e-16 1.110223e-16 1.110223e-16 1.110223e-16  0.000000e+00
## 9  5.551115e-17 5.551115e-17 5.551115e-17 5.551115e-17 -1.387779e-17
## 10 5.551115e-17 5.551115e-17 5.551115e-17 5.551115e-17 -1.387779e-17
## 11 5.551115e-17 5.551115e-17 5.551115e-17 5.551115e-17 -1.387779e-17
## 12 5.551115e-17 5.551115e-17 5.551115e-17 5.551115e-17 -1.387779e-17
##                6             7             8             9            10
## 1   4.163336e-17  4.163336e-17  4.163336e-17  5.551115e-17  5.551115e-17
## 2   4.163336e-17  4.163336e-17  4.163336e-17  5.551115e-17  5.551115e-17
## 3   4.163336e-17  4.163336e-17  4.163336e-17  5.551115e-17  5.551115e-17
## 4   4.163336e-17  4.163336e-17  4.163336e-17  5.551115e-17  5.551115e-17
## 5   0.000000e+00  0.000000e+00  0.000000e+00 -1.387779e-17 -1.387779e-17
## 6   0.000000e+00  0.000000e+00  0.000000e+00 -1.387779e-17 -1.387779e-17
## 7   0.000000e+00  0.000000e+00  0.000000e+00 -1.387779e-17 -1.387779e-17
## 8   0.000000e+00  0.000000e+00  0.000000e+00 -1.387779e-17 -1.387779e-17
## 9  -1.387779e-17 -1.387779e-17 -1.387779e-17  5.551115e-17  5.551115e-17
## 10 -1.387779e-17 -1.387779e-17 -1.387779e-17  5.551115e-17  5.551115e-17
## 11 -1.387779e-17 -1.387779e-17 -1.387779e-17  5.551115e-17  5.551115e-17
## 12 -1.387779e-17 -1.387779e-17 -1.387779e-17  5.551115e-17  5.551115e-17
##               11            12
## 1   5.551115e-17  5.551115e-17
## 2   5.551115e-17  5.551115e-17
## 3   5.551115e-17  5.551115e-17
## 4   5.551115e-17  5.551115e-17
## 5  -1.387779e-17 -1.387779e-17
## 6  -1.387779e-17 -1.387779e-17
## 7  -1.387779e-17 -1.387779e-17
## 8  -1.387779e-17 -1.387779e-17
## 9   5.551115e-17  5.551115e-17
## 10  5.551115e-17  5.551115e-17
## 11  5.551115e-17  5.551115e-17
## 12  5.551115e-17  5.551115e-17
  1. Find a third linearly indep. combination
aux2 <- rbind(lambda1,lambda2)
qr(aux2)$rank
## [1] 2
aux3 <- rbind(lambda1,lambda2,lambda3)
qr(aux3)$rank
## [1] 3
  1. Verify the first one corresponds to the means model
lambda1 <- c(1,1,0,0)
lambda2 <- c(1,0,1,0)
lambda3 <- c(1,0,0,0)
Lambda=rbind(lambda1,lambda2,lambda3)
gama<-c(1,1,0,0) # arbitrary vector as long as U is non-singular
# gama<-c(1,1,0,0) # alternative 1
 gama<-c(0,0,1,1) # alternative 2
U <- rbind(Lambda,gama)
qr(U)$rank
## [1] 4
W<-solve(U)
W1<-W[,1:3]
eta <- W[,4]
X%*%eta # should be zero vector
##    [,1]
## 1     0
## 2     0
## 3     0
## 4     0
## 5     0
## 6     0
## 7     0
## 8     0
## 9     1
## 10    1
## 11    1
## 12    1
Z1 <- X%*%W1
qr(Z1)$rank # new design matrix
## [1] 3
t(W)%*%t(X)%*%X%*%W # check it diagonalises X'X
##         lambda1 lambda2 lambda3 gama
## lambda1       4       0       0    0
## lambda2       0       8      -8   -4
## lambda3       0      -8      16    8
## gama          0      -4       8    4
# second reparametrisation
# uses eigenvectors to guarantee diagonalisation

E <- t(eigen(t(X)%*%X)$vectors)
Lambda=E[1:3,]
gama<-E[4,]
U<-rbind(Lambda, gama)
qr(U)$rank
## [1] 4
W<-solve(U)
W1<-W[,1:3]
X%*%eta # should be zero vector
##    [,1]
## 1     0
## 2     0
## 3     0
## 4     0
## 5     0
## 6     0
## 7     0
## 8     0
## 9     1
## 10    1
## 11    1
## 12    1
Z2 <- X%*%W1
qr(Z2)$rank # new design matrix
## [1] 3
t(W)%*%t(X)%*%X%*%W # check it diagonalises X'X
##                                                       gama
##       1.600000e+01 1.415592e-16 -6.331015e-17 3.877810e-15
##       3.538981e-17 4.000000e+00  6.780619e-16 9.694524e-16
##      -4.278674e-16 6.546001e-16  4.000000e+00 5.253632e-16
## gama  2.307555e-15 6.879111e-16  4.658665e-16 4.807121e-31
  1. Verify
# enough to check the vectors
d1 <- y-X%*%ginv1%*%t(X)%*%y
d2 <- y-X%*%ginv2%*%t(X)%*%y
d1-d2
##             [,1]
## 1  -2.273737e-13
## 2  -2.273737e-13
## 3  -2.273737e-13
## 4  -2.273737e-13
## 5  -2.273737e-13
## 6  -2.273737e-13
## 7  -2.273737e-13
## 8  -2.273737e-13
## 9  -2.273737e-13
## 10 -2.273737e-13
## 11 -2.273737e-13
## 12 -2.273737e-13
  1. Compute F and compare to aov in R
SSE <- t(d1)%*%d1
d0 <- y-mean(y)
SSE0 <- t(d0)%*%d0
FF<-((n-t)/(t-1))*(SSE0-SSE)/SSE
FF
##          [,1]
## [1,] 77.39836
aov1<-aov(response~treatment,crd)
summary(aov1)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## treatment    2   9782    4891    77.4 2.14e-06 ***
## Residuals    9    569      63                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1