Libraries

library(tidyverse)
url_data <- "http://people.bath.ac.uk/kai21/MA50259/Data/COdata.txt"
COdata <- url_data %>% read_delim(delim=" ", col_types=list(col_character(), col_character(), col_integer()))
  1. Load packages
library(MASS)
  1. Construct design matrix
# create factors
COdata <- COdata %>% mutate(Eth=as.factor(Eth))
COdata <- COdata %>% mutate(Ratio=as.factor(Ratio))
# create interactions
inter <- interaction(COdata$Eth, COdata$Ratio, sep = ":")

X1 <- model.matrix(~ Eth-1, data=COdata)
X2 <- model.matrix(~ Ratio-1, data=COdata)
X3 <- model.matrix(~ inter-1)

X <- cbind(1, X1, X2, X3)
colnames(X)[1]<- "(intercept)"
X
##    (intercept) Eth0.1 Eth0.2 Eth0.3 Ratio14 Ratio15 Ratio16 inter0.1:14
## 1            1      1      0      0       1       0       0           1
## 2            1      1      0      0       0       1       0           0
## 3            1      1      0      0       0       0       1           0
## 4            1      0      1      0       1       0       0           0
## 5            1      0      1      0       0       1       0           0
## 6            1      0      1      0       0       0       1           0
## 7            1      0      0      1       1       0       0           0
## 8            1      0      0      1       0       1       0           0
## 9            1      0      0      1       0       0       1           0
## 10           1      1      0      0       1       0       0           1
## 11           1      1      0      0       0       1       0           0
## 12           1      1      0      0       0       0       1           0
## 13           1      0      1      0       1       0       0           0
## 14           1      0      1      0       0       1       0           0
## 15           1      0      1      0       0       0       1           0
## 16           1      0      0      1       1       0       0           0
## 17           1      0      0      1       0       1       0           0
## 18           1      0      0      1       0       0       1           0
##    inter0.2:14 inter0.3:14 inter0.1:15 inter0.2:15 inter0.3:15 inter0.1:16
## 1            0           0           0           0           0           0
## 2            0           0           1           0           0           0
## 3            0           0           0           0           0           1
## 4            1           0           0           0           0           0
## 5            0           0           0           1           0           0
## 6            0           0           0           0           0           0
## 7            0           1           0           0           0           0
## 8            0           0           0           0           1           0
## 9            0           0           0           0           0           0
## 10           0           0           0           0           0           0
## 11           0           0           1           0           0           0
## 12           0           0           0           0           0           1
## 13           1           0           0           0           0           0
## 14           0           0           0           1           0           0
## 15           0           0           0           0           0           0
## 16           0           1           0           0           0           0
## 17           0           0           0           0           1           0
## 18           0           0           0           0           0           0
##    inter0.2:16 inter0.3:16
## 1            0           0
## 2            0           0
## 3            0           0
## 4            0           0
## 5            0           0
## 6            1           0
## 7            0           0
## 8            0           0
## 9            0           1
## 10           0           0
## 11           0           0
## 12           0           0
## 13           0           0
## 14           0           0
## 15           1           0
## 16           0           0
## 17           0           0
## 18           0           1
Z <- model.matrix(~ Eth*Ratio, data = COdata)
y <- COdata$CO
y
##  [1] 66 72 68 78 80 66 90 75 60 62 67 66 81 81 69 94 78 58
  1. Find the rank of X
qr(X)$rank
## [1] 9
  1. Compute the Moore-Penrose inverse
A <- unname(t(X)%*%X) # remove row/column names
G <- ginv(t(X)%*%X)
  1. Find a solution to the normal eqs. and verify it actually solves them.
beta.est <- G%*%t(X)%*%y
beta.est
##           [,1]
##  [1,] 40.96875
##  [2,]  9.15625
##  [3,] 15.90625
##  [4,] 15.90625
##  [5,] 17.90625
##  [6,] 15.65625
##  [7,]  7.40625
##  [8,] -4.03125
##  [9,]  4.71875
## [10,] 17.21875
## [11,]  3.71875
## [12,]  7.96875
## [13,]  3.96875
## [14,]  9.46875
## [15,]  3.21875
## [16,] -5.28125
# now we verify that we have solved the normal equations.
unname(t(X)%*%X%*%beta.est-t(X)%*%y)
##                [,1]
##  [1,]  0.000000e+00
##  [2,]  5.684342e-14
##  [3,]  1.705303e-13
##  [4,]  0.000000e+00
##  [5,]  1.705303e-13
##  [6,]  0.000000e+00
##  [7,]  0.000000e+00
##  [8,]  5.684342e-14
##  [9,]  8.526513e-14
## [10,]  0.000000e+00
## [11,]  0.000000e+00
## [12,]  5.684342e-14
## [13,]  0.000000e+00
## [14,] -2.842171e-14
## [15,]  2.842171e-14
## [16,] -2.842171e-14
  1. Find unbiased estimates for each of the nine means in the above model.
# matrix with rows equal to estimable coefficients 
Lambda <- X[1:9,]
# estimates are simple lambda'*beta.est
Lambda%*%beta.est
##   [,1]
## 1 64.0
## 2 69.5
## 3 67.0
## 4 79.5
## 5 80.5
## 6 67.5
## 7 92.0
## 8 76.5
## 9 59.0
# we can confirm this calculation by calculating the actual means by hand
means <- rep(0,9)
for (i in 1:9){
  means[i]<-mean(y[c(i,9+i)])
}
means
## [1] 64.0 69.5 67.0 79.5 80.5 67.5 92.0 76.5 59.0
  1. Verify the 9 estiable linear combos. are linearly independent.
# rank should be equal to rank of X
qr(Lambda)$rank
## [1] 9
  1. Compare answers
mod.factorial<-lm(CO~Eth*Ratio, data = COdata)
summary(mod.factorial)
## 
## Call:
## lm(formula = CO ~ Eth * Ratio, data = COdata)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##   -2.5   -1.5    0.0    1.5    2.5 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      64.000      1.607  39.819 1.98e-11 ***
## Eth0.2           15.500      2.273   6.819 7.74e-05 ***
## Eth0.3           28.000      2.273  12.318 6.16e-07 ***
## Ratio15           5.500      2.273   2.420 0.038631 *  
## Ratio16           3.000      2.273   1.320 0.219477    
## Eth0.2:Ratio15   -4.500      3.215  -1.400 0.195062    
## Eth0.3:Ratio15  -21.000      3.215  -6.533 0.000107 ***
## Eth0.2:Ratio16  -15.000      3.215  -4.666 0.001175 ** 
## Eth0.3:Ratio16  -36.000      3.215 -11.199 1.38e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.273 on 9 degrees of freedom
## Multiple R-squared:  0.9727, Adjusted R-squared:  0.9483 
## F-statistic: 40.02 on 8 and 9 DF,  p-value: 3.861e-06
# 7 coumns removed

Z <- model.matrix(CO~Eth*Ratio, data = COdata)
AA <- t(Z)%*%Z
# new design matrix should be invertible
beta.est.2<-solve(AA)%*%t(Z)%*%y
means.est<-Z%*%beta.est.2
means.est
##    [,1]
## 1  64.0
## 2  69.5
## 3  67.0
## 4  79.5
## 5  80.5
## 6  67.5
## 7  92.0
## 8  76.5
## 9  59.0
## 10 64.0
## 11 69.5
## 12 67.0
## 13 79.5
## 14 80.5
## 15 67.5
## 16 92.0
## 17 76.5
## 18 59.0
  1. Plot conclusions?
with(COdata, (interaction.plot(Eth, Ratio, CO, type = "b",
  pch = c(18,24,22), leg.bty = "o",
  main = "Interaction Plot of Ethanol and air/fuel ratio",
  xlab = "Ethanol", ylab = "CO emissions")))

## NULL
  1. Compute the sum of squares for the CO data model
d <- y-X%*%G%*%t(X)%*%y
SSE <- t(d)%*%d
SSE
##      [,1]
## [1,] 46.5
  1. Testing hypotheses
d1<-y-X1%*%solve(t(X1)%*%X1)%*%t(X1)%*%y
d2<-y-X2%*%solve(t(X2)%*%X2)%*%t(X2)%*%y
XX3 <- model.matrix(~Eth+Ratio, data = COdata)
d3<-y-XX3%*%solve(t(XX3)%*%XX3)%*%t(XX3)%*%y

SS1 <- t(d1)%*%d1
SS2 <- t(d2)%*%d2
SS3 <- t(d3)%*%d3

SS2 - SS3
##      [,1]
## [1,]  324
SS1 - SS3
##      [,1]
## [1,]  652
SS3 - SSE
##      [,1]
## [1,]  678
SSE
##      [,1]
## [1,] 46.5
mod1 <- aov( CO ~ Eth * Ratio, data = COdata)
summary(mod1)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## Eth          2  324.0   162.0   31.36 8.79e-05 ***
## Ratio        2  652.0   326.0   63.10 5.07e-06 ***
## Eth:Ratio    4  678.0   169.5   32.81 2.24e-05 ***
## Residuals    9   46.5     5.2                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1