Designing Experiments and Analyzing Data: A Model Comparison Perspective (3rd edition)

R Code and Instructions to Accompany Chapter 9

These notes build on the prior chapters’ R Code and Instructions, where we discuss some operational aspects of R and detail the use of many functions and how to install packages. Here, we skip details that have already been discussed in the prior R Code and Instructions.

Load the AMCP package.

library(AMCP)
data(chapter_9_table_1)
chapter_9_table_1
##   Group X  Y
## 1     1 1  4
## 2     1 2  9
## 3     1 3  8
## 4     2 3 12
## 5     2 4 11
## 6     2 5 16
chapter_9_table_1$Group <- as.factor(chapter_9_table_1$Group)
str(chapter_9_table_1)
## 'data.frame':    6 obs. of  3 variables:
##  $ Group: Factor w/ 2 levels "1","2": 1 1 1 2 2 2
##  $ X    : int  1 2 3 3 4 5
##  $ Y    : int  4 9 8 12 11 16

Here we partition the data and perform two regressions, one for Group=1 and one for Group=1. Compare this output to Table 9.4.

lm.1 <- lm(Y ~ X, data=chapter_9_table_1[chapter_9_table_1$Group=="1",])
lm.2 <- lm(Y ~ X, data=chapter_9_table_1[chapter_9_table_1$Group=="2",])

summary(lm.1)
## 
## Call:
## lm(formula = Y ~ X, data = chapter_9_table_1[chapter_9_table_1$Group == 
##     "1", ])
## 
## Residuals:
##  1  2  3 
## -1  2 -1 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)    3.000      3.742   0.802    0.570
## X              2.000      1.732   1.155    0.454
## 
## Residual standard error: 2.449 on 1 degrees of freedom
## Multiple R-squared:  0.5714, Adjusted R-squared:  0.1429 
## F-statistic: 1.333 on 1 and 1 DF,  p-value: 0.4544
summary(lm.2)
## 
## Call:
## lm(formula = Y ~ X, data = chapter_9_table_1[chapter_9_table_1$Group == 
##     "2", ])
## 
## Residuals:
##  4  5  6 
##  1 -2  1 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)    5.000      7.071   0.707    0.608
## X              2.000      1.732   1.155    0.454
## 
## Residual standard error: 2.449 on 1 degrees of freedom
## Multiple R-squared:  0.5714, Adjusted R-squared:  0.1429 
## F-statistic: 1.333 on 1 and 1 DF,  p-value: 0.4544

Here we perform a linear model and ask for the ANOVA-based output (via anova()) and the regression-based output (via summary()). This yields, per the R default, Type I sums of squares.

ANOVA.9.1 <- lm(Y ~ Group, data=chapter_9_table_1)
ANCOVA.9.1 <- lm(Y ~ X+Group, data=chapter_9_table_1)

# ANOVA
anova(ANOVA.9.1)
## Analysis of Variance Table
## 
## Response: Y
##           Df Sum Sq Mean Sq F value  Pr(>F)  
## Group      1     54      54  7.7143 0.04995 *
## Residuals  4     28       7                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(ANOVA.9.1)
## 
## Call:
## lm(formula = Y ~ Group, data = chapter_9_table_1)
## 
## Residuals:
##  1  2  3  4  5  6 
## -3  2  1 -1 -2  3 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    7.000      1.528   4.583   0.0102 *
## Group2         6.000      2.160   2.777   0.0499 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.646 on 4 degrees of freedom
## Multiple R-squared:  0.6585, Adjusted R-squared:  0.5732 
## F-statistic: 7.714 on 1 and 4 DF,  p-value: 0.04995
# ANCOVA
anova(ANCOVA.9.1)
## Analysis of Variance Table
## 
## Response: Y
##           Df Sum Sq Mean Sq F value  Pr(>F)  
## X          1   67.6    67.6    16.9 0.02607 *
## Group      1    2.4     2.4     0.6 0.49503  
## Residuals  3   12.0     4.0                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(ANCOVA.9.1)
## 
## Call:
## lm(formula = Y ~ X + Group, data = chapter_9_table_1)
## 
## Residuals:
##  1  2  3  4  5  6 
## -1  2 -1  1 -2  1 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)    3.000      2.309   1.299    0.285
## X              2.000      1.000   2.000    0.139
## Group2         2.000      2.582   0.775    0.495
## 
## Residual standard error: 2 on 3 degrees of freedom
## Multiple R-squared:  0.8537, Adjusted R-squared:  0.7561 
## F-statistic:  8.75 on 2 and 3 DF,  p-value: 0.05598

Using the Anova() function from the car package (see the Chapter 7 R code), Type III sums of squares can be obtained.

library(car)
Anova(ANCOVA.9.1, type="III")
## Anova Table (Type III tests)
## 
## Response: Y
##             Sum Sq Df F value Pr(>F)
## (Intercept)   6.75  1  1.6875 0.2848
## X            16.00  1  4.0000 0.1393
## Group         2.40  1  0.6000 0.4950
## Residuals    12.00  3

Because the ANOVA is nested within the ANCOVA model, a model comparison can be done using the anova() function, where two different (but nested) linear models are provided as input. This output evaluates if the extra term in the ANCOVA (i.e., the slope) has a statistically significant effect.

anova(ANOVA.9.1, ANCOVA.9.1)
## Analysis of Variance Table
## 
## Model 1: Y ~ Group
## Model 2: Y ~ X + Group
##   Res.Df RSS Df Sum of Sq  F Pr(>F)
## 1      4  28                       
## 2      3  12  1        16  4 0.1393

It is useful to find the adjusted means. We can compute adjusted means using the full model shown in Equation 9.25. The predict() function can be used, which we demonstrate in a step-by-step fasion, but this is only one way to obtain the adjusted means.

#Compute grand mean across groups on the covariate; duplicate the grand mean for the number of groups (here two).
X.Grand.Mean <- mean(chapter_9_table_1$X)
X.Grand.Mean
## [1] 3
# Create factor with levels equal to group number
Group.Predict <- factor(c(1,2), levels=c(1,2))
Group.Predict
## [1] 1 2
## Levels: 1 2
# Create a data frame with same factor and predictor variable structure as used in ANCOVA model previously
Group.Predict <- data.frame(Group = Group.Predict, X = rep(X.Grand.Mean, 2))
Group.Predict
##   Group X
## 1     1 3
## 2     2 3
# Use the ANCOVA model solution to make predictions of `Y` at the grand mean of `X`.
Adj.Means <- predict(ANCOVA.9.1, Group.Predict)
Adj.Means
##  1  2 
##  9 11
# Observed means on X (for comparison purposes)
Obs.Mean.X <- c(mean(chapter_9_table_1[chapter_9_table_1$Group==1, "X"]),
                mean(chapter_9_table_1[chapter_9_table_1$Group==2, "X"]))
Obs.Mean.X
## [1] 2 4
# Observed means on DV (for comparison purposes)
Obs.Mean.Y <- c(mean(chapter_9_table_1[chapter_9_table_1$Group==1, "Y"]),
                mean(chapter_9_table_1[chapter_9_table_1$Group==2, "Y"]))
Obs.Mean.Y
## [1]  7 13
Summary.of.Means = data.frame(Obs.Mean.X=Obs.Mean.X, X.Grand.Mean=X.Grand.Mean,
                              Adjusted.Group.Means.Y=Adj.Means, Obs.Mean.Y=Obs.Mean.Y)
Summary.of.Means
##   Obs.Mean.X X.Grand.Mean Adjusted.Group.Means.Y Obs.Mean.Y
## 1          2            3                      9          7
## 2          4            3                     11         13

Here we perform the numerical example with the Beck DePression data from Table 9.7

data(chapter_9_table_7)
chapter_9_table_7
##    Condition Pre Post
## 1          1  18   12
## 2          1  16    0
## 3          1  16   10
## 4          1  15    9
## 5          1  14    0
## 6          1  20   11
## 7          1  14    2
## 8          1  21    4
## 9          1  25   15
## 10         1  11   10
## 11         2  18   11
## 12         2  16    4
## 13         2  15   19
## 14         2  14   15
## 15         2  20    3
## 16         2  25   14
## 17         2  11   10
## 18         2  25   16
## 19         2  11   10
## 20         2  22   20
## 21         3  15   17
## 22         3  19   25
## 23         3  10   10
## 24         3  29   22
## 25         3  24   23
## 26         3  15   10
## 27         3   9    2
## 28         3  18   10
## 29         3  22   14
## 30         3  13    7
str(chapter_9_table_7)
## 'data.frame':    30 obs. of  3 variables:
##  $ Condition: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Pre      : int  18 16 16 15 14 20 14 21 25 11 ...
##  $ Post     : int  12 0 10 9 0 11 2 4 15 10 ...
chapter_9_table_7$Group <- chapter_9_table_7$Condition
chapter_9_table_7$Group[chapter_9_table_7$Group==1] <- "SSRI"
chapter_9_table_7$Group[chapter_9_table_7$Group==2] <- "Placebo"
chapter_9_table_7$Group[chapter_9_table_7$Group==3] <- "Wait List"

# Make Group a factor
chapter_9_table_7$Group <- as.factor(chapter_9_table_7$Group)

chapter_9_table_7
##    Condition Pre Post     Group
## 1          1  18   12      SSRI
## 2          1  16    0      SSRI
## 3          1  16   10      SSRI
## 4          1  15    9      SSRI
## 5          1  14    0      SSRI
## 6          1  20   11      SSRI
## 7          1  14    2      SSRI
## 8          1  21    4      SSRI
## 9          1  25   15      SSRI
## 10         1  11   10      SSRI
## 11         2  18   11   Placebo
## 12         2  16    4   Placebo
## 13         2  15   19   Placebo
## 14         2  14   15   Placebo
## 15         2  20    3   Placebo
## 16         2  25   14   Placebo
## 17         2  11   10   Placebo
## 18         2  25   16   Placebo
## 19         2  11   10   Placebo
## 20         2  22   20   Placebo
## 21         3  15   17 Wait List
## 22         3  19   25 Wait List
## 23         3  10   10 Wait List
## 24         3  29   22 Wait List
## 25         3  24   23 Wait List
## 26         3  15   10 Wait List
## 27         3   9    2 Wait List
## 28         3  18   10 Wait List
## 29         3  22   14 Wait List
## 30         3  13    7 Wait List
str(chapter_9_table_7)
## 'data.frame':    30 obs. of  4 variables:
##  $ Condition: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Pre      : int  18 16 16 15 14 20 14 21 25 11 ...
##  $ Post     : int  12 0 10 9 0 11 2 4 15 10 ...
##  $ Group    : Factor w/ 3 levels "Placebo","SSRI",..: 2 2 2 2 2 2 2 2 2 2 ...

Descriptives.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
group_by(chapter_9_table_7, Group) %>%
summarise(mean=mean(Pre), sd=sd(Pre), n=n())
## # A tibble: 3 × 4
##       Group  mean       sd     n
##      <fctr> <dbl>    <dbl> <int>
## 1   Placebo  17.7 5.207900    10
## 2      SSRI  17.0 4.082483    10
## 3 Wait List  17.4 6.310485    10
group_by(chapter_9_table_7, Group) %>%
summarise(mean=mean(Post), sd=sd(Post), n=n())
## # A tibble: 3 × 4
##       Group  mean       sd     n
##      <fctr> <dbl>    <dbl> <int>
## 1   Placebo  12.2 5.731007    10
## 2      SSRI   7.3 5.355164    10
## 3 Wait List  14.0 7.571878    10

Here we perform a linear model and ask for the ANOVA-based output (via anova()) and the regression-based output (via summary()). This yields, per the R default, Type I sums of squares. See the first part of Table 9.9.

Beck_ANOVA.Model <- lm(Post ~ Group, data=chapter_9_table_7)
Beck_ANCOVA.Model <- lm(Post~ Group + Pre, data=chapter_9_table_7)

# ANOVA
anova(Beck_ANOVA.Model)
## Analysis of Variance Table
## 
## Response: Post
##           Df  Sum Sq Mean Sq F value  Pr(>F)  
## Group      2  240.47 120.233  3.0348 0.06473 .
## Residuals 27 1069.70  39.619                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ANCOVA
anova(Beck_ANCOVA.Model)
## Analysis of Variance Table
## 
## Response: Post
##           Df Sum Sq Mean Sq F value   Pr(>F)   
## Group      2 240.47  120.23  4.1332 0.027629 * 
## Pre        1 313.37  313.37 10.7723 0.002937 **
## Residuals 26 756.33   29.09                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Now, we perform the ANOVA using the Type III sums of squares. See the seCondition part of Table 9.9. Note that the model output from summary() differs from the book because of the way in which R vs. SAS code the factors.

Anova(Beck_ANCOVA.Model, type="III")
## Anova Table (Type III tests)
## 
## Response: Post
##             Sum Sq Df F value   Pr(>F)   
## (Intercept)   1.17  1  0.0403 0.842477   
## Group       217.15  2  3.7324 0.037584 * 
## Pre         313.37  1 10.7723 0.002937 **
## Residuals   756.33 26                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(Beck_ANCOVA.Model)
## 
## Call:
## lm(formula = Post ~ Group + Pre, data = chapter_9_table_7)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.6842  -3.9615   0.6448   3.8773   9.9675 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      0.7779     3.8756   0.201  0.84248   
## GroupSSRI       -4.4483     2.4160  -1.841  0.07703 . 
## GroupWait List   1.9936     2.4128   0.826  0.41617   
## Pre              0.6453     0.1966   3.282  0.00294 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.393 on 26 degrees of freedom
## Multiple R-squared:  0.4227, Adjusted R-squared:  0.3561 
## F-statistic: 6.346 on 3 and 26 DF,  p-value: 0.002252

Note that the reference level can be changed using the relevel() function, which we demonstrate here.

chapter_9_table_7.B <- chapter_9_table_7
chapter_9_table_7.B$Group <- relevel(chapter_9_table_7.B$Group, ref= "Wait List")

Beck_ANCOVA.Model2 <- lm(Post~ Group + Pre, data=chapter_9_table_7.B)
summary(Beck_ANCOVA.Model2)
## 
## Call:
## lm(formula = Post ~ Group + Pre, data = chapter_9_table_7.B)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.6842  -3.9615   0.6448   3.8773   9.9675 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    2.7715     3.8227   0.725  0.47492   
## GroupPlacebo  -1.9936     2.4128  -0.826  0.41617   
## GroupSSRI     -6.4419     2.4133  -2.669  0.01292 * 
## Pre            0.6453     0.1966   3.282  0.00294 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.393 on 26 degrees of freedom
## Multiple R-squared:  0.4227, Adjusted R-squared:  0.3561 
## F-statistic: 6.346 on 3 and 26 DF,  p-value: 0.002252

The book cover.

Here we show the code that was the basis for the cover of the book cover. The data below uses Rosenthal’s Pygmalion data like that used for Ch. 9, exercise 15, but include the mean of two Post measures.

library(plotrix)

Data <- rbind(
c(0,61.00,86.00,25.00),
c(0,79.00,82.00,3.00),
c(0,86.00,103.00,17.00),
c(0,91.00,90.00,-1.00),
c(0,92.00,105.00,13.00),
c(0,100.00,120.00,20.00),
c(0,111.00,105.00,-6.00),
c(1,60.00,97.00,37.00),
c(1,61.00,106.00,45.00),
c(1,84.00,107.00,23.00),
c(1,88.00,128.00,40.00),
c(1,95.00,113.00,18.00),
c(1,100.00,108.00,8.00),
c(1,116.00,137.00,21.00))
colnames(Data) <- c("Treatment", "IQ.Pre", "IQ.8", "IQ.Gain")
Data <- as.data.frame(Data)

# To make adjustments to figure region
par(mar= c(5, 4, 4, 2) + 0.1, omi=c(.5, .5, .5, 1))

plot(IQ.8 ~ IQ.Pre, data=Data, type="n", ylab="Posttest", xlab="Pretest", xlim=c(60, 120), ylim=c(80, 140))
axis.break(axis=1, style="zigzag") 
axis.break(axis=2, style="zigzag") 

points(Data[Data$Treatment==0,"IQ.8"] ~ Data[Data$Treatment==0,"IQ.Pre"], pch=1)
points(Data[Data$Treatment==1,"IQ.8"] ~ Data[Data$Treatment==1,"IQ.Pre"], pch=16)

Result <- summary(lm(IQ.8 ~ as.factor(Treatment) + IQ.Pre, data=Data))
b.0 <- coef(Result)[1,1]
b.Treatment <- coef(Result)[2,1]
b.W <- coef(Result)[3,1]

y.hat <- b.0 + b.Treatment*Data["Treatment"] + b.W*Data["IQ.Pre"]
colnames(y.hat) <- NULL

Data.Fitted <- cbind(Data, y.hat=y.hat)

Data.Fitted[Data.Fitted$Treatment==0,"y.hat"]
## [1]  83.76818  93.52574  97.32035 100.03078 100.57287 104.90956 110.87252
Data.Fitted[Data.Fitted$Treatment==1,"y.hat"]
## [1]  99.46515 100.00723 112.47523 114.64358 118.43819 121.14862 129.82201
abline(a=b.0+b.Treatment, b=b.W)

abline(a=b.0, b=b.W)

Result.R <- summary(lm(IQ.8 ~ IQ.Pre, data=Data))
abline(Result.R, lty=2, lwd=2)
## Warning in abline(Result.R, lty = 2, lwd = 2): only using the first two of
## 8 regression coefficients
# mtext(side=4, line=.5, adj=1, expression(paste(widehat(italic(Y))[1]==50, sep="")))
text(125.3, b.0 + b.Treatment*1 + b.W*123, expression(paste(widehat(italic(Y))["T"], sep="")), xpd=NA)

text(125.3, b.0 + b.Treatment*0 + b.W*123, expression(paste(widehat(italic(Y))["C"], sep="")), xpd=NA)

text(126.3, 61.6504 + 0.5097*123, expression(paste(widehat(italic(Y))["Restricted"], sep="")), xpd=NA)

legend(60, 135, legend=c("Treatment, T", "Control, C"), pch=c(16, 1), bty="n")

text(125.3, mean(Data[Data$Treatment==0,"IQ.8"]), expression(paste(italic(bar(Y))["C"], sep="")), xpd=NA)
text(125.3, mean(Data[Data$Treatment==1,"IQ.8"]), expression(paste(italic(bar(Y))["T"], sep="")), xpd=NA)
text(123.8, mean(Data$IQ.8), expression(paste(bar(italic(Y)), sep="")), xpd=NA)

abline(h=mean(Data[Data$Treatment==0,"IQ.8"]))
abline(h=mean(Data[Data$Treatment==1,"IQ.8"]))
abline(h=mean(Data$IQ.8))

The above plot has details that may not be necessary. Here we show just the ANCOVA output for the plot (i.e., this plot does not include the simpler ANOVA model lines [had we ignored the pretest]). Note too that the “Restricted” line could be dropped if desired.

# To make adjustments to figure region
par(mar= c(5, 4, 4, 2) + 0.1, omi=c(.5, .5, .5, 1))


plot(IQ.8 ~ IQ.Pre, data=Data, type="n", ylab="Posttest", xlab="Pretest", xlim=c(60, 120), ylim=c(80, 140), main="ANCOVA Model Visually")


axis.break(axis=1, style="zigzag") 
axis.break(axis=2, style="zigzag") 

points(Data[Data$Treatment==0,"IQ.8"] ~ Data[Data$Treatment==0,"IQ.Pre"], pch=1)
points(Data[Data$Treatment==1,"IQ.8"] ~ Data[Data$Treatment==1,"IQ.Pre"], pch=16)

Result <- summary(lm(IQ.8 ~ as.factor(Treatment) + IQ.Pre, data=Data))
b.0 <- coef(Result)[1,1]
b.Treatment <- coef(Result)[2,1]
b.W <- coef(Result)[3,1]

y.hat <- b.0 + b.Treatment*Data["Treatment"] + b.W*Data["IQ.Pre"]
colnames(y.hat) <- NULL

Data.Fitted <- cbind(Data, y.hat=y.hat)

Data.Fitted[Data.Fitted$Treatment==0,"y.hat"]
## [1]  83.76818  93.52574  97.32035 100.03078 100.57287 104.90956 110.87252
Data.Fitted[Data.Fitted$Treatment==1,"y.hat"]
## [1]  99.46515 100.00723 112.47523 114.64358 118.43819 121.14862 129.82201
abline(a=b.0+b.Treatment, b=b.W)

abline(a=b.0, b=b.W)

Result.R <- summary(lm(IQ.8 ~ IQ.Pre, data=Data))
abline(Result.R, lty=2, lwd=2)
## Warning in abline(Result.R, lty = 2, lwd = 2): only using the first two of
## 8 regression coefficients
# mtext(side=4, line=.5, adj=1, expression(paste(widehat(italic(Y))[1]==50, sep="")))
text(125.3, b.0 + b.Treatment*1 + b.W*123, expression(paste(widehat(italic(Y))["T"], sep="")), xpd=NA)

text(125.3, b.0 + b.Treatment*0 + b.W*123, expression(paste(widehat(italic(Y))["C"], sep="")), xpd=NA)

text(126.3, 61.6504 + 0.5097*123, expression(paste(widehat(italic(Y))["Restricted"], sep="")), xpd=NA)

legend(60, 135, legend=c("Treatment, T", "Control, C"), pch=c(16, 1), bty="n")