Libraries
library(tidyverse)
url_data <- "http://people.bath.ac.uk/kai21/MA50259/Data/COdata.txt"
COdata <- url_data %>% read_delim(delim=" ", col_types=list(col_character(), col_character(), col_integer()))
library(MASS)
# create factors
COdata <- COdata %>% mutate(Eth=as.factor(Eth))
COdata <- COdata %>% mutate(Ratio=as.factor(Ratio))
# create interactions
inter <- interaction(COdata$Eth, COdata$Ratio, sep = ":")
X1 <- model.matrix(~ Eth-1, data=COdata)
X2 <- model.matrix(~ Ratio-1, data=COdata)
X3 <- model.matrix(~ inter-1)
X <- cbind(1, X1, X2, X3)
colnames(X)[1]<- "(intercept)"
X
## (intercept) Eth0.1 Eth0.2 Eth0.3 Ratio14 Ratio15 Ratio16 inter0.1:14
## 1 1 1 0 0 1 0 0 1
## 2 1 1 0 0 0 1 0 0
## 3 1 1 0 0 0 0 1 0
## 4 1 0 1 0 1 0 0 0
## 5 1 0 1 0 0 1 0 0
## 6 1 0 1 0 0 0 1 0
## 7 1 0 0 1 1 0 0 0
## 8 1 0 0 1 0 1 0 0
## 9 1 0 0 1 0 0 1 0
## 10 1 1 0 0 1 0 0 1
## 11 1 1 0 0 0 1 0 0
## 12 1 1 0 0 0 0 1 0
## 13 1 0 1 0 1 0 0 0
## 14 1 0 1 0 0 1 0 0
## 15 1 0 1 0 0 0 1 0
## 16 1 0 0 1 1 0 0 0
## 17 1 0 0 1 0 1 0 0
## 18 1 0 0 1 0 0 1 0
## inter0.2:14 inter0.3:14 inter0.1:15 inter0.2:15 inter0.3:15 inter0.1:16
## 1 0 0 0 0 0 0
## 2 0 0 1 0 0 0
## 3 0 0 0 0 0 1
## 4 1 0 0 0 0 0
## 5 0 0 0 1 0 0
## 6 0 0 0 0 0 0
## 7 0 1 0 0 0 0
## 8 0 0 0 0 1 0
## 9 0 0 0 0 0 0
## 10 0 0 0 0 0 0
## 11 0 0 1 0 0 0
## 12 0 0 0 0 0 1
## 13 1 0 0 0 0 0
## 14 0 0 0 1 0 0
## 15 0 0 0 0 0 0
## 16 0 1 0 0 0 0
## 17 0 0 0 0 1 0
## 18 0 0 0 0 0 0
## inter0.2:16 inter0.3:16
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 1 0
## 7 0 0
## 8 0 0
## 9 0 1
## 10 0 0
## 11 0 0
## 12 0 0
## 13 0 0
## 14 0 0
## 15 1 0
## 16 0 0
## 17 0 0
## 18 0 1
Z <- model.matrix(~ Eth*Ratio, data = COdata)
y <- COdata$CO
y
## [1] 66 72 68 78 80 66 90 75 60 62 67 66 81 81 69 94 78 58
qr(X)$rank
## [1] 9
A <- unname(t(X)%*%X) # remove row/column names
G <- ginv(t(X)%*%X)
beta.est <- G%*%t(X)%*%y
beta.est
## [,1]
## [1,] 40.96875
## [2,] 9.15625
## [3,] 15.90625
## [4,] 15.90625
## [5,] 17.90625
## [6,] 15.65625
## [7,] 7.40625
## [8,] -4.03125
## [9,] 4.71875
## [10,] 17.21875
## [11,] 3.71875
## [12,] 7.96875
## [13,] 3.96875
## [14,] 9.46875
## [15,] 3.21875
## [16,] -5.28125
# now we verify that we have solved the normal equations.
unname(t(X)%*%X%*%beta.est-t(X)%*%y)
## [,1]
## [1,] 0.000000e+00
## [2,] 5.684342e-14
## [3,] 1.705303e-13
## [4,] 0.000000e+00
## [5,] 1.705303e-13
## [6,] 0.000000e+00
## [7,] 0.000000e+00
## [8,] 5.684342e-14
## [9,] 8.526513e-14
## [10,] 0.000000e+00
## [11,] 0.000000e+00
## [12,] 5.684342e-14
## [13,] 0.000000e+00
## [14,] -2.842171e-14
## [15,] 2.842171e-14
## [16,] -2.842171e-14
# matrix with rows equal to estimable coefficients
Lambda <- X[1:9,]
# estimates are simple lambda'*beta.est
Lambda%*%beta.est
## [,1]
## 1 64.0
## 2 69.5
## 3 67.0
## 4 79.5
## 5 80.5
## 6 67.5
## 7 92.0
## 8 76.5
## 9 59.0
# we can confirm this calculation by calculating the actual means by hand
means <- rep(0,9)
for (i in 1:9){
means[i]<-mean(y[c(i,9+i)])
}
means
## [1] 64.0 69.5 67.0 79.5 80.5 67.5 92.0 76.5 59.0
# rank should be equal to rank of X
qr(Lambda)$rank
## [1] 9
mod.factorial<-lm(CO~Eth*Ratio, data = COdata)
summary(mod.factorial)
##
## Call:
## lm(formula = CO ~ Eth * Ratio, data = COdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5 -1.5 0.0 1.5 2.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 64.000 1.607 39.819 1.98e-11 ***
## Eth0.2 15.500 2.273 6.819 7.74e-05 ***
## Eth0.3 28.000 2.273 12.318 6.16e-07 ***
## Ratio15 5.500 2.273 2.420 0.038631 *
## Ratio16 3.000 2.273 1.320 0.219477
## Eth0.2:Ratio15 -4.500 3.215 -1.400 0.195062
## Eth0.3:Ratio15 -21.000 3.215 -6.533 0.000107 ***
## Eth0.2:Ratio16 -15.000 3.215 -4.666 0.001175 **
## Eth0.3:Ratio16 -36.000 3.215 -11.199 1.38e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.273 on 9 degrees of freedom
## Multiple R-squared: 0.9727, Adjusted R-squared: 0.9483
## F-statistic: 40.02 on 8 and 9 DF, p-value: 3.861e-06
# 7 coumns removed
Z <- model.matrix(CO~Eth*Ratio, data = COdata)
AA <- t(Z)%*%Z
# new design matrix should be invertible
beta.est.2<-solve(AA)%*%t(Z)%*%y
means.est<-Z%*%beta.est.2
means.est
## [,1]
## 1 64.0
## 2 69.5
## 3 67.0
## 4 79.5
## 5 80.5
## 6 67.5
## 7 92.0
## 8 76.5
## 9 59.0
## 10 64.0
## 11 69.5
## 12 67.0
## 13 79.5
## 14 80.5
## 15 67.5
## 16 92.0
## 17 76.5
## 18 59.0
with(COdata, (interaction.plot(Eth, Ratio, CO, type = "b",
pch = c(18,24,22), leg.bty = "o",
main = "Interaction Plot of Ethanol and air/fuel ratio",
xlab = "Ethanol", ylab = "CO emissions")))
## NULL
d <- y-X%*%G%*%t(X)%*%y
SSE <- t(d)%*%d
SSE
## [,1]
## [1,] 46.5
d1<-y-X1%*%solve(t(X1)%*%X1)%*%t(X1)%*%y
d2<-y-X2%*%solve(t(X2)%*%X2)%*%t(X2)%*%y
XX3 <- model.matrix(~Eth+Ratio, data = COdata)
d3<-y-XX3%*%solve(t(XX3)%*%XX3)%*%t(XX3)%*%y
SS1 <- t(d1)%*%d1
SS2 <- t(d2)%*%d2
SS3 <- t(d3)%*%d3
SS2 - SS3
## [,1]
## [1,] 324
SS1 - SS3
## [,1]
## [1,] 652
SS3 - SSE
## [,1]
## [1,] 678
SSE
## [,1]
## [1,] 46.5
mod1 <- aov( CO ~ Eth * Ratio, data = COdata)
summary(mod1)
## Df Sum Sq Mean Sq F value Pr(>F)
## Eth 2 324.0 162.0 31.36 8.79e-05 ***
## Ratio 2 652.0 326.0 63.10 5.07e-06 ***
## Eth:Ratio 4 678.0 169.5 32.81 2.24e-05 ***
## Residuals 9 46.5 5.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1