In this note we’re going to explore interactions and contrasts in linear models.
We’ll download the data from Scientific Reports by Jonas O. Wolff and Stanislav N. Gorb. It’s looking at the four pair of legs (\(Li\) for each \(i\in{1,..,4}\)) (first column) on a spider and the frictional coefficient of the legs (second column) with respect to some interaction (pull or push)(third column).
#install.packages("downloader","ggplot2")
#library(downloader,ggplot2)
url<- "https://raw.githubusercontent.com/genomicsclass/dagdata/master/inst/extdata/spider_wolff_gorb_2013.csv"
filname<-"spider_wolff_gorb_2013.csv"
if (!file.exists(filname)) download(url,filname)
spider<-read.csv(filname, skip=1)
head(spider)
nrow(spider)
[1] 282
We’d like to recreate the box plot is in the paper. It looks like this:
boxplot(spider$friction~spider$type*spider$leg,
col=c("grey90","grey40"),las=2,
main="Comparision of Friction Coefficients of Different Pair Legs", xlab="Legs",ylab="Friction Coefficient")
spider.sub<-spider[spider$leg=="L1",]
fit<-lm(friction~type,data=spider.sub)
summary(fit)
Call:
lm(formula = friction ~ type, data = spider.sub)
Residuals:
Min 1Q Median 3Q Max
-0.33147 -0.10735 -0.04941 -0.00147 0.76853
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.92147 0.03827 24.078 < 2e-16 ***
typepush -0.51412 0.05412 -9.499 5.7e-14 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.2232 on 66 degrees of freedom
Multiple R-squared: 0.5776, Adjusted R-squared: 0.5711
F-statistic: 90.23 on 1 and 66 DF, p-value: 5.698e-14
(coefs<-coef(fit))
(Intercept) typepush
0.9214706 -0.5141176
Interpreting the above results:
mean(spider.sub[spider.sub$type=="pull",]$friction)
[1] 0.9214706
mean(spider.sub[spider.sub$type=="push",]$friction)-mean(spider.sub[spider.sub$type=="pull",]$friction)
[1] -0.5141176
#install.packages("devtools")
#library(devtools)
#install_github("ririzarr/rafalib")
#library(rafalib)
X<-model.matrix(~type,data=spider.sub) #pull=0,push=1
colnames(X)
[1] "(Intercept)" "typepush"
head(X)
(Intercept) typepush
1 1 0
2 1 0
3 1 0
4 1 0
5 1 0
6 1 0
imagemat(X, main="Model Matrix for Linear Model with Interactions")
Graphically:
install.packages("ggplot2")
Error in install.packages : Updating loaded packages
library(ggplot2)
p<-ggplot(spider.sub)
p<-p+aes(x=type,y=friction)
p<-p+geom_point()+geom_jitter(width=0.1)+geom_smooth(method="loess",formula = "y ~ x")
p<-p+ylab("Friction Coefficient")+xlab("Movement Type")
p
Now we’re going to get the linear models that are slightly more interesting. So now we are going to have two variables in the formula:
X<-model.matrix(~type+leg,data=spider)
colnames(X) #intercept, typepush, and the three non-reference level of leg (L1)
[1] "(Intercept)" "typepush" "legL2" "legL3" "legL4"
head(X)
(Intercept) typepush legL2 legL3 legL4
1 1 0 0 0 0
2 1 0 0 0 0
3 1 0 0 0 0
4 1 0 0 0 0
5 1 0 0 0 0
6 1 0 0 0 0
imagemat(X,main="Model Matrix for Linear Model with 2 Factors")
fit2<-lm(friction~type+leg,data=spider)
summary(fit2)
Call:
lm(formula = friction ~ type + leg, data = spider)
Residuals:
Min 1Q Median 3Q Max
-0.46392 -0.13441 -0.00525 0.10547 0.69509
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.05392 0.02816 37.426 < 2e-16 ***
typepush -0.77901 0.02482 -31.380 < 2e-16 ***
legL2 0.17192 0.04569 3.763 0.000205 ***
legL3 0.16049 0.03251 4.937 1.37e-06 ***
legL4 0.28134 0.03438 8.183 1.01e-14 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.2084 on 277 degrees of freedom
Multiple R-squared: 0.7916, Adjusted R-squared: 0.7886
F-statistic: 263 on 4 and 277 DF, p-value: < 2.2e-16
(coefs<-coef(fit2))
(Intercept) typepush legL2 legL3 legL4
1.0539153 -0.7790071 0.1719216 0.1604921 0.2813382
spider$group<-factor(paste0(spider$leg,spider$type))
p<-ggplot(spider)
p<-p+aes(x=group,y=friction)
p<-p+geom_point()+geom_jitter(width=0.1)
p
#install.packages("RColorBrewer")
#library(RColorBrewer)
a<- -0.25
lgth<- .1
stripchart(split(spider$friction,spider$group),
vertical=T,pch=1, method = "jitter",las=2,xlim=c(0,11),ylim=c(0,2))
cols<-brewer.pal(5,"Dark2")
abline(h=0)
arrows(1+a,0,1+a,coefs[1],lwd=3,col=cols[1],length = lgth)
abline(h=coefs[1],col=cols[1])
arrows(3+a,coefs[1],3+a,coefs[1]+coefs[3],lwd=3,col=cols[3],length = lgth)
arrows(5+a,coefs[1],5+a,coefs[1]+coefs[4],lwd=4,col=cols[4],length = lgth)
arrows(7+a,coefs[1],7+a,coefs[1]+coefs[5],lwd=5,col=cols[5],length = lgth)
arrows(2+a,coefs[1],2+a,coefs[1]+coefs[3]+coefs[2],lwd=3,col=cols[2],length = lgth)
segments(3+a,coefs[1]+coefs[3],4+a,coefs[1]+coefs[3],lwd=3,col=cols[3])
arrows(4+a,coefs[1]+coefs[3],4+a,coefs[1]+coefs[3]+coefs[2],lwd=3,col=cols[2],length = lgth)
segments(5+a,coefs[1]+coefs[4],6+a,coefs[1]+coefs[4],lwd=3,col=cols[4])
arrows(6+a,coefs[1]+coefs[4],6+a,coefs[1]+coefs[4]+coefs[2],lwd=3,col=cols[2],length = lgth)
segments(7+a,coefs[1]+coefs[5],8+a,coefs[1]+coefs[5],lwd=3,col=cols[5])
arrows(8+a,coefs[1]+coefs[5],8+a,coefs[1]+coefs[5]+coefs[2],lwd=3,col=cols[2],length = lgth)
legend("right",names(coefs),fill = cols,cex=0.7,bg="beige")
Note that
mean(spider[spider$group=="L1pull",]$friction)
[1] 0.9214706
This is because when we had 2 coefficients and 2 groups, the coefficients could be such that they predicted the exact mean of the two groups. And now we have 8 groups and only 5 coefficients.
Finally, let’s to see how to compare two groups where one of them is not the reference level.
For instance, we want to compare the leg L2 with L3 effect. The way to do that is to use the “contrast”
L3vsL2.equiv<-contrast(fit2,list(leg="L3",type="push"),list(leg="L2",type="push"))
L3vsL2.equiv
lm model parameter contrast
Contrast S.E. Lower Upper t df Pr(>|t|)
-0.01142949 0.04319685 -0.0964653 0.07360632 -0.26 277 0.7915
L3vsL2.equiv$X
(Intercept) typepush legL2 legL3 legL4
1 0 0 -1 1 0
attr(,"assign")
[1] 0 1 2 2 2
attr(,"contrasts")
attr(,"contrasts")$type
[1] "contr.treatment"
attr(,"contrasts")$leg
[1] "contr.treatment"
Contrasts Exercises
Suppose we have an experiment with two species A and B, and two conditions: control and treated.
species <- factor(c("A","A","B","B"))
condition <- factor(c("control","treated","control","treated"))
\[\begin{matrix}\text{species}(A)=0, & \text{species}(B)=1\\ \text{condition}(control)=0, & \text{condition}(trated)=1 \end{matrix}\]
And we will use a formula of ~ species + condition.
X<-model.matrix(~ species + condition)
X
(Intercept) speciesB conditiontreated
1 1 0 0
2 1 0 1
3 1 1 0
4 1 1 1
attr(,"assign")
[1] 0 1 2
attr(,"contrasts")
attr(,"contrasts")$species
[1] "contr.treatment"
attr(,"contrasts")$condition
[1] "contr.treatment"
Contrasts Exercises #1
Suppose we want to build a contrast of coefficients for the above experimental design.
You can either figure this question out through logic, by looking at the design matrix, or using the contrast() function from the contrast library. If you have not done so already, you should download the contrast library. The contrast vector is returned as contrast()$X.
What should the contrast vector be, for the contrast of (species=B and condition=control) vs (species=A and condition=treatment)? Assume that the beta vector from the model fit by R is: Intercept, speciesB, conditiontreated.
Y<-matrix(rnorm(4),ncol=1)
fit4<-lm(Y~species+condition)
summary(fit4)
Call:
lm(formula = Y ~ species + condition)
Residuals:
1 2 3 4
0.2285 -0.2285 -0.2285 0.2285
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.03171 0.39571 -0.080 0.949
speciesB -0.50565 0.45693 -1.107 0.468
conditiontreated -0.84439 0.45693 -1.848 0.316
Residual standard error: 0.4569 on 1 degrees of freedom
Multiple R-squared: 0.8227, Adjusted R-squared: 0.468
F-statistic: 2.32 on 2 and 1 DF, p-value: 0.4211
contrast(fit4,list(species="B",condition="control"),list(species="A",condition="treated"))$X
(Intercept) speciesB conditiontreated
1 0 1 -1
attr(,"assign")
[1] 0 1 2
attr(,"contrasts")
attr(,"contrasts")$species
[1] "contr.treatment"
attr(,"contrasts")$condition
[1] "contr.treatment"
Contrasts Exercises #2
Suppose we build a model using two variables: ~ type + leg.
What is the t-value for the contrast of leg pair L4 vs leg pair L2?
L4vsL2<-contrast(fit2,list(leg="L4",type="pull"),list(leg="L2",type="pull"))
L4vsL2
lm model parameter contrast
Contrast S.E. Lower Upper t df Pr(>|t|)
0.1094167 0.04462392 0.02157158 0.1972618 2.45 277 0.0148
L4vsL2$X
(Intercept) typepush legL2 legL3 legL4
1 0 0 -1 0 1
attr(,"assign")
[1] 0 1 2 2 2
attr(,"contrasts")
attr(,"contrasts")$type
[1] "contr.treatment"
attr(,"contrasts")$leg
[1] "contr.treatment"
Now we can use a linear model with interactions.
X<-model.matrix(~type+leg+type:leg,data=spider)
colnames(X)
[1] "(Intercept)" "typepush" "legL2" "legL3" "legL4"
[6] "typepush:legL2" "typepush:legL3" "typepush:legL4"
head(X)
(Intercept) typepush legL2 legL3 legL4 typepush:legL2 typepush:legL3 typepush:legL4
1 1 0 0 0 0 0 0 0
2 1 0 0 0 0 0 0 0
3 1 0 0 0 0 0 0 0
4 1 0 0 0 0 0 0 0
5 1 0 0 0 0 0 0 0
6 1 0 0 0 0 0 0 0
imagemat(X, main="Matrix Model for Linear Model with Interations")
fit5<-lm(friction~type+leg+type:leg,data=spider)
summary(fit5)
Call:
lm(formula = friction ~ type + leg + type:leg, data = spider)
Residuals:
Min 1Q Median 3Q Max
-0.46385 -0.10735 -0.01111 0.07848 0.76853
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.92147 0.03266 28.215 < 2e-16 ***
typepush -0.51412 0.04619 -11.131 < 2e-16 ***
legL2 0.22386 0.05903 3.792 0.000184 ***
legL3 0.35238 0.04200 8.390 2.62e-15 ***
legL4 0.47928 0.04442 10.789 < 2e-16 ***
typepush:legL2 -0.10388 0.08348 -1.244 0.214409
typepush:legL3 -0.38377 0.05940 -6.461 4.73e-10 ***
typepush:legL4 -0.39588 0.06282 -6.302 1.17e-09 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.1904 on 274 degrees of freedom
Multiple R-squared: 0.8279, Adjusted R-squared: 0.8235
F-statistic: 188.3 on 7 and 274 DF, p-value: < 2.2e-16
(coefs<-coef(fit5))
(Intercept) typepush legL2 legL3 legL4
0.9214706 -0.5141176 0.2238627 0.3523756 0.4792794
typepush:legL2 typepush:legL3 typepush:legL4
-0.1038824 -0.3837670 -0.3958824
stripchart(split(spider$friction,spider$group),
vertical=T,pch=1, method = "jitter",las=2,xlim=c(0,11),ylim=c(0,2))
cols<-brewer.pal(8,"Dark2")
abline(h=0)
arrows(1+a,0,1+a,coefs[1],lwd=3,col=cols[1],length = lgth)
abline(h=coefs[1],col=cols[1])
arrows(2+a,coefs[1],2+a,coefs[1]+coefs[2],lwd=3,col=cols[2],length = lgth)
arrows(3+a,coefs[1],3+a,coefs[1]+coefs[3],lwd=3,col=cols[3],length = lgth)
segments(3+a,coefs[1]+coefs[3],4+a,coefs[1]+coefs[3],lwd=3,col=cols[3])
arrows(4+a,coefs[1]+coefs[3],4+a,coefs[1]+coefs[3]+coefs[2],lwd=3,col=cols[2],length = lgth)
arrows(4+a,coefs[1]+coefs[2]+coefs[3],4+a,coefs[1]+coefs[3]+coefs[2]+coefs[6],lwd=3,col=cols[6],length = lgth)
arrows(5+a,coefs[1],5+a,coefs[1]+coefs[4],lwd=4,col=cols[4],length = lgth)
segments(5+a,coefs[1]+coefs[4],6+a,coefs[1]+coefs[4],lwd=3,col=cols[4])
arrows(6+a,coefs[1]+coefs[4],6+a,coefs[1]+coefs[4]+coefs[2],lwd=3,col=cols[2],length = lgth)
arrows(6+a,coefs[1]+coefs[2]+coefs[4],6+a,coefs[1]+coefs[4]+coefs[2]+coefs[7],lwd=3,col=cols[7],length = lgth)
arrows(7+a,coefs[1],7+a,coefs[1]+coefs[5],lwd=5,col=cols[5],length = lgth)
segments(7+a,coefs[1]+coefs[5],8+a,coefs[1]+coefs[5],lwd=3,col=cols[5])
arrows(8+a,coefs[1]+coefs[5],8+a,coefs[1]+coefs[5]+coefs[2],lwd=3,col=cols[2],length = lgth)
arrows(8+a,coefs[1]+coefs[2]+coefs[5],8+a,coefs[1]+coefs[5]+coefs[2]+coefs[8],lwd=3,col=cols[8],length = lgth)
legend("right",names(coefs),fill = cols,cex=0.7,bg="beige")