Designing Experiments and Analyzing Data: A Model Comparison Perspective (3rd edition) by Maxwell, Delaney, & Kelley

Information about the book is available at https://designingexperiments.com

R Code and Instructions to Accompany Chapter 4

These notes build on Chapter 1 and 3, where we discuss some operational aspects of R and detail the use of many functions and how to install packages. Here, we skip details that are discussed in the Chapters 1 and 3 R Code and Instructions.

Load the data for Table 4.1

library(AMCP)
data(chapter_4_table_1)
chapter_4_table_1

##    bloodpr cond
## 1        4    1
## 2       95    1
## 3       93    1
## 4      104    1
## 5       81    2
## 6       84    2
## 7       92    2
## 8      101    2
## 9       80    2
## 10     108    2
## 11      98    3
## 12      95    3
## 13      86    3
## 14      87    3
## 15      94    3
## 16      91    4
## 17      78    4
## 18      85    4
## 19      80    4
## 20      81    4

# Create the group name from group code. 
chapter_4_table_1$group <- ifelse(chapter_4_table_1$cond==1, "Drug Therapy", 
                             ifelse(chapter_4_table_1$cond==2, "Biofeedback", 
                                ifelse(chapter_4_table_1$cond==3, "Diet",
                                   ifelse(chapter_4_table_1$cond==4, "Combination", NA))))

Summarize using dplyr

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Summary.4.1 <- chapter_4_table_1 %>% 
group_by(group) %>%
summarize(
mean=mean(bloodpr),
median=median(bloodpr),
var=var(bloodpr),
sd=sd(bloodpr),
n=n()
) %>%
as.data.frame() 
# Note that this seemingly unimportant step is an important inclusion
# for how we use Summary.4.1 later. In particularl, the behavior of 
# extracted values differs when the object is a data frame as compared 
# to a dplyr table. 

Summary.4.1

##          group mean median      var        sd n
## 1  Biofeedback   91     88  132.000 11.489125 6
## 2  Combination   83     81   26.500  5.147815 5
## 3         Diet   92     94   27.500  5.244044 5
## 4 Drug Therapy   74     94 2200.667 46.911264 4

Obtain Mean Square Within, Method 1 (pooled within group variance; i.e., the weighted mean of the within group variances).

MS.Within <- sum(Summary.4.1$var*(Summary.4.1$n-1))/sum(Summary.4.1$n-1)

Obtain Mean Square Within, Method 2 (extracting objects from ANOVA)

ANOVA.Object <- aov(bloodpr~group, data=chapter_4_table_1)
ANOVA_4.1 <- anova(ANOVA.Object)
SS.B <- ANOVA_4.1$"Sum Sq"[1]
SS.W <- ANOVA_4.1$"Sum Sq"[2]
MS.B <- ANOVA_4.1$"Mean Sq"[1]
MS.W <- ANOVA_4.1$"Mean Sq"[2]

Here we compare two groups and show four approaches

Approach 1: Using basic definitional formulas by first calculating, and assigning to objects, the relevant descriptive statistics. This approach uses only base R functions (no add-on packages). Although this approach does not take advantage of some features of R and packages, it is useful to see the analysis in this way, as it corresponds with the book and allows one to more fully investigate aspects of data that might be missed using a more automated approach.

# For ease, separate the data into their corresponding groups
# (so that the summary statistics can be used). 
Group.1 <- chapter_4_table_1[chapter_4_table_1$cond==1, "bloodpr"]
Group.2 <- chapter_4_table_1[chapter_4_table_1$cond==2, "bloodpr"]
Group.3 <- chapter_4_table_1[chapter_4_table_1$cond==3, "bloodpr"]
Group.4 <- chapter_4_table_1[chapter_4_table_1$cond==4, "bloodpr"]

# Descriptive statistics. 
Y.bar1 <- mean(Group.1)
s2.1 <- var(Group.1)
n.1 <- length(Group.1)

Y.bar2 <- mean(Group.2)
s2.2 <- var(Group.2)
n.2 <- length(Group.2)

Y.bar3 <- mean(Group.3)
s2.3 <- var(Group.3)
n.3 <- length(Group.3)

Y.bar4 <- mean(Group.4)
s2.4 <- var(Group.4)
n.4 <- length(Group.4)

Y.bar.Star <- mean(c(Group.1, Group.2))

N <- nrow(chapter_4_table_1)
a <- length(unique(chapter_4_table_1$cond))

# Model comparisons.
df.R <- N-(a-1)
df.F <- N-a
E.F <- sum((Group.1 - Y.bar1)^2) + sum((Group.2 - Y.bar2)^2) + 
  sum((Group.3-Y.bar3)^2) + sum((Group.4-Y.bar4)^2)
E.R <- sum((Group.1 - Y.bar.Star)^2) + sum((Group.2 - Y.bar.Star)^2) + 
  sum((Group.3-Y.bar3)^2) + sum((Group.4-Y.bar4)^2)

F <- ((E.R-E.F)/(df.R-df.F))/(E.F/df.F)
F

## [1] 1.484033

1-pf(F, a, N-1)

## [1] 0.2464343

Approach 2: Using the computationally equivalent approach to the above (for a two-group comparison).

F <- ((n.1*n.2)*(Y.bar1 - Y.bar2)^2)/((n.1+n.2)*MS.W)

Approach 3: Obtaining the restricted model using group names and definitional equations based on the summary table previously created (using dplyr). Some of the values from above are repeated here for completeness. Note that the way objects are extracted from the summary table Summary.4.1 requires the conversion of the dplyr table to a data frame (see the last in the “Summarize using dplyr” code snippet from above)

Y.bar.star <- mean(chapter_4_table_1[chapter_4_table_1$group %in% c("Drug Therapy", "Biofeedback"), 
                                     "bloodpr"])

E.R <- sum((chapter_4_table_1[chapter_4_table_1$group %in% c("Drug Therapy", "Biofeedback"), 
       "bloodpr"] - Y.bar.star)^2) +
sum((chapter_4_table_1[chapter_4_table_1$group %in% "Diet", "bloodpr"] -
       Summary.4.1[Summary.4.1$group=="Diet", "mean"])^2) +
sum((chapter_4_table_1[chapter_4_table_1$group == "Combination", "bloodpr"] -
       Summary.4.1[Summary.4.1$group=="Combination", "mean"])^2)

# From the previously performed ANOVA. 
E.F <- SS.W

a <- length(unique(chapter_4_table_1$group))
N <- nrow(chapter_4_table_1)

df.R <- N-(a-1)
df.F <- N-a
F <- ((E.R-E.F)/(df.R-df.F))/(E.F/df.F)
F

## [1] 1.484033

1-pf(F, a, N-1)

## [1] 0.2464343

Approach 4: Using c-weights (and the mean square within which was previously obtained via the ANOVA).

c.weights <- c(1, -1, 0, 0)
Ybar <- c(Y.bar1, Y.bar2, Y.bar3, Y.bar4)
n <- c(n.1, n.2, n.3, n.4)
  
Psi <- sum(c.weights*Ybar)

F <- (Psi^2/sum(c.weights^2/n))/MS.W
F

## [1] 1.484033

1-pf(F, a, N-1)

## [1] 0.2464343

Now we perform a complex comparison.

Again using the c-weights approach, we perform a complex comparison comparing the average of the first three groups to the last group. Other than the different c-weights, the code is the same as above.

c.weights <- c(1/3, 1/3, 1/3, -1)
Ybar <- c(Y.bar1, Y.bar2, Y.bar3, Y.bar4)
n <- c(n.1, n.2, n.3, n.4)
  
Psi <- sum(c.weights*Ybar)
Psi

## [1] 2.666667

F <- (Psi^2/sum(c.weights^2/n))/MS.W
F

## [1] 0.05666276

1-pf(F, a, N-1)

## [1] 0.993517

Here we do not assume homogeneity of variance for a contrast. We repeat some of the code from above for completeness. Note that the way we write the code, which matches how it is displayed in the book, is due to R’s “vectorized arithmetic” (and thus making the equations in the R code mimic those in the book).

Ybar <- c(Y.bar1, Y.bar2, Y.bar3, Y.bar4)
s2 <- c(s2.1, s2.2, s2.3, s2.4)
n <- c(n.1, n.2, n.3, n.4)

# Contrast 1
c1 <- c(1, -1, 0, 0)
Psi1 <- sum(Ybar*c1)
var.Psi1 <- sum((c1^2/n)*s2)
F1 <- Psi1^2/var.Psi1
df1 <- sum(c1^2*s2/n)^2/sum((c1^2*s2/n)^2/(n-1))

1-pf(F1, 1, df1)

## [1] 0.5250104

# Contrast 2
# Notice we use fractional c-weights, whereas the book uses whole numbers
c2 <- c(1/3, 1/3, 1/3, -1) 
Psi2 <- sum(Ybar*c2)
var.Psi2 <- sum((c2^2/n)*s2)
F2 <- Psi2^2/var.Psi2
df2 <- sum(c2^2*s2/n)^2/sum((c2^2*s2/n)^2/(n-1))

1-pf(F2, 1, df2)

## [1] 0.7656416

Yet another approach to forming confidence intervals is with the use of the MBESS package (which assumes homogeneity of variance) and its ci.c() function.

library(MBESS)

ANOVA.Object <-aov(bloodpr ~ as.factor(cond), data=chapter_4_table_1)
ANOVA_4.1 <- anova(ANOVA.Object)

MS.W <- ANOVA_4.1$"Mean Sq"[2]

N <- nrow(chapter_4_table_1)
a <- length(unique(chapter_4_table_1$cond))


Y.bar1 <- mean(Group.1)
s2.1 <- var(Group.1)
n.1 <- length(Group.1)

Y.bar2 <- mean(Group.2)
s2.2 <- var(Group.2)
n.2 <- length(Group.2)
c(Y.bar1, Y.bar2, Y.bar3)

## [1] 74 91 92

s2.3

## [1] 27.5

n.3

## [1] 5

ci.c(means=c(Y.bar1, Y.bar2, Y.bar3, Y.bar4), c.weights=c(1, -1, 0, 0),
         n=c(n.1, n.2, n.3, n.4), N=N, s.anova=sqrt(MS.W))

## $Lower.Conf.Limit.Contrast
## [1] -46.58309
## 
## $Contrast
## [1] -17
## 
## $Upper.Conf.Limit.Contrast
## [1] 12.58309

ci.c(means=c(Y.bar1, Y.bar2, Y.bar3, Y.bar4), c.weights=c(1/3, 1/3, 1/3, -1),
         n=c(n.1, n.2, n.3, n.4), N=N, s.anova=sqrt(MS.W))

## $Lower.Conf.Limit.Contrast
## [1] -21.08184
## 
## $Contrast
## [1] 2.666667
## 
## $Upper.Conf.Limit.Contrast
## [1] 26.41517

The ci.c() function above is for contrasts, in particular, unstandardized contrasts. The ci.SC function is for standardized contrasts. It works in the same way, though the confidence intervals are based on the noncentral t-distribution.

ci.sc(means=c(Y.bar1, Y.bar2, Y.bar3, Y.bar4), c.weights=c(1, -1, 0, 0),
         n=c(n.1, n.2, n.3, n.4), N=N, s.anova=sqrt(MS.W))

## $Lower.Conf.Limit.Standardized.Contrast
## [1] -2.068384
## 
## $Standardized.contrast
## [1] -0.7863505
## 
## $Upper.Conf.Limit.Standardized.Contrast
## [1] 0.519003

ci.sc(means=c(Y.bar1, Y.bar2, Y.bar3, Y.bar4), c.weights=c(1/3, 1/3, 1/3, -1),
         n=c(n.1, n.2, n.3, n.4), N=N, s.anova=sqrt(MS.W))

## $Lower.Conf.Limit.Standardized.Contrast
## [1] -0.8950724
## 
## $Standardized.contrast
## [1] 0.1233491
## 
## $Upper.Conf.Limit.Standardized.Contrast
## [1] 1.137955

An example calculation of R-squared alerting is provided below.

ANOVA.Object <-aov(bloodpr ~ as.factor(cond), data=chapter_4_table_1)
ANOVA_4.1 <- anova(ANOVA.Object)

SS.B <- ANOVA_4.1$"Sum Sq"[1]

# For ease, separate the data into their corresponding groups. 
Group.1 <- chapter_4_table_1[chapter_4_table_1$cond==1, "bloodpr"]
Group.2 <- chapter_4_table_1[chapter_4_table_1$cond==2, "bloodpr"]
Group.3 <- chapter_4_table_1[chapter_4_table_1$cond==3, "bloodpr"]
Group.4 <- chapter_4_table_1[chapter_4_table_1$cond==4, "bloodpr"]

# Descriptive statistics. 
Y.bar1 <- mean(Group.1)
Y.bar2 <- mean(Group.2)
Y.bar3 <- mean(Group.3)
Y.bar4 <- mean(Group.4)

Y.bars <- c(Y.bar1, Y.bar2, Y.bar3, Y.bar4)
n <- c(n.1, n.2, n.3, n.4)

c.weights.1 <- c(1, -1, 0, 0)
Psi.1 <- c.weights.1%*%Y.bars
SS.Psi.1 <- Psi.1^2/sum(c.weights.1^2/n)  
SS.Psi.1

##       [,1]
## [1,] 693.6

c.weights.2 <- c(1/3, 1/3, 1/3, -1)
Psi.2 <- c.weights.2%*%Y.bars
SS.Psi.2 <- Psi.2^2/sum(c.weights.2^2/n)  
SS.Psi.2

##          [,1]
## [1,] 26.48276

R2.alerting.1 <- SS.Psi.1/SS.B
R2.alerting.2 <- SS.Psi.2/SS.B

R2.alerting.1

##           [,1]
## [1,] 0.7296828

R2.alerting.2

##            [,1]
## [1,] 0.02786046

Above was one approach to effect sizes for contrasts. Below we calculate the R-squared effect size. This is similar to the R-squared alerting, but here the denominator is the Sum of Squares Total, not the Sum of Squares Between.

# First, recall that an ANOVA was already performed and is contained in the following object. 
ANOVA_4.1

## Analysis of Variance Table
## 
## Response: bloodpr
##                 Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(cond)  3  950.5  316.85  0.6779 0.5782
## Residuals       16 7478.0  467.38

# The sum of squares total is the sum of the Sum of Squares Between and 
# the Sum of Squares Within.
SS.B <- ANOVA_4.1$"Sum Sq"[1]
SS.W <- ANOVA_4.1$"Sum Sq"[2]
SS.T <- SS.B + SS.W

R2.EffectSize.1 <- SS.Psi.1/SS.T
R2.EffectSize.2 <- SS.Psi.2/SS.T

R2.EffectSize.1

##            [,1]
## [1,] 0.08229173

R2.EffectSize.2

##            [,1]
## [1,] 0.00314203

Using the Sums of Squares for the Contrasts and Sum of Squares Within (from above), we form R-squared Contrast.

R2.Contrast.1 <- SS.Psi.1/(SS.Psi.1 + SS.W)
R2.Contrast.2 <- SS.Psi.2/(SS.Psi.2 + SS.W)

R2.Contrast.1

##            [,1]
## [1,] 0.08487934

R2.Contrast.2

##             [,1]
## [1,] 0.003528925

Here we do an ANOVA but only with groups 1 and 2. Then, we calculate the R2.Contrast for the 1st contrast (i.e., the pairwise difference between the first two groups.). Note that the Sum of Squares for the contrast does not change with the reduction in groups. However, the Sum of Squares Within does. We use “_12" augmented to the objects to denote that the value concerns only groups 1 and 2. As the books states, “our general preference is to base this measure [\(R^2_{Contrast}\)] only on the groups actually involved in the comparison.”

ANOVA.Object_12 <-aov(bloodpr ~ as.factor(cond), data=chapter_4_table_1[chapter_4_table_1$cond 
                                                                        %in% c(1,2), ])
ANOVA_4.1_12 <- anova(ANOVA.Object_12)

SS.W_2 <- ANOVA_4.1_12$"Sum Sq"[2]

R2.Contrast.1_12 <- SS.Psi.1/(SS.Psi.1 + SS.W_2)
R2.Contrast.1_12

##            [,1]
## [1,] 0.08718387

Test for orthogonality. Here, we define a matrix in which the rows represent contrasts of interest. This is for a=3 and 2 contrasts.

C.Matrix <- rbind(Psi1=c(1, -1, 0), Psi2=c(1, 0, -1))

# First, verify that sum of the c-weights is zero (it is below, continue). 
apply(C.Matrix, MARGIN=1, FUN=sum)

## Psi1 Psi2 
##    0    0

# Check if the sum of the products of c-weights is zero (it is not below; 
# these are nonorthogonal contrasts).
sum(C.Matrix["Psi1", ] * C.Matrix["Psi2", ])

## [1] 1

Here we check for orthogonality of this (new) C.Matrix2 object. This is for a=3 and 2 contrasts.

C.Matrix2 <- rbind(Psi1=c(1, -1, 0), Psi2=c(1/2, 1/2, -1))

# First, verify that sum of the c-weights is zero. 
apply(C.Matrix2, MARGIN=1, FUN=sum)

## Psi1 Psi2 
##    0    0

# Check if the sum of the products of c-weights is zero (it is below; these are orthogonal contrasts).
sum(C.Matrix2["Psi1", ] * C.Matrix2["Psi2", ])

## [1] 0