The model

model1 <- lm(breaks ~ tension*wool, data = warpbreaks)
# get the source table
car::Anova(model1, type=3)

## Anova Table (Type III tests)
## 
## Response: breaks
##               Sum Sq Df  F value    Pr(>F)    
## (Intercept)  17866.8  1 149.2757 2.426e-16 ***
## tension       2468.5  2  10.3121 0.0001881 ***
## wool          1200.5  1  10.0301 0.0026768 ** 
## tension:wool  1002.8  2   4.1891 0.0210442 *  
## Residuals     5745.1 48                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Get estimated marginal means

That’s the predicted DV value for each group you’re comparing. When you run an ANOVA (one-way or factorial), the predicted DV values for each group are just the group means, so lsmeans() will give you the same means you would get using psych::describeBy(), for example. When you have a more complex model (like an ANCOVA), the estimated marginal means may differ from the regular ol’ group means becuase they control for any other predictors in the model.

For right now, even if you’re just doing plain ANOVAs, it’s handy to use lsmeans() because it produces a special lsmeans object that we can then send to the super awesome contrast() function from the lsmeans package.

library(lsmeans)

## Loading required package: estimability

# means of tension collapsing across wool
means.tension <- lsmeans(model1, specs = "tension")

## NOTE: Results may be misleading due to involvement in interactions

means.tension

##  tension   lsmean      SE df lower.CL upper.CL
##  L       36.38889 2.57865 48 31.20417 41.57361
##  M       26.38889 2.57865 48 21.20417 31.57361
##  H       21.66667 2.57865 48 16.48194 26.85139
## 
## Results are averaged over the levels of: wool 
## Confidence level used: 0.95

# means of wool collapsing across tension
means.wool  <- lsmeans(model1, specs = "wool")

## NOTE: Results may be misleading due to involvement in interactions

means.wool

##  wool   lsmean       SE df lower.CL upper.CL
##  A    31.03704 2.105459 48 26.80373 35.27035
##  B    25.25926 2.105459 48 21.02595 29.49257
## 
## Results are averaged over the levels of: tension 
## Confidence level used: 0.95

# cell means
means.int <- lsmeans(model1, specs = c("tension", "wool"))
means.int

##  tension wool   lsmean       SE df lower.CL upper.CL
##  L       A    44.55556 3.646761 48 37.22325 51.88786
##  M       A    24.00000 3.646761 48 16.66769 31.33231
##  H       A    24.55556 3.646761 48 17.22325 31.88786
##  L       B    28.22222 3.646761 48 20.88992 35.55453
##  M       B    28.77778 3.646761 48 21.44547 36.11008
##  H       B    18.77778 3.646761 48 11.44547 26.11008
## 
## Confidence level used: 0.95

Comparing means (i.e. contrasts)

Note that the contrast function here is in the lsmeans package. It’s very different from the contrasts function in the stats package of base R.

Check out this excellent description of the lsmeans package: https://cran.r-project.org/web/packages/lsmeans/vignettes/using-lsmeans.pdf

Specify your own contrasts

# contrasts on the main effect of tension
# compare Low to Medium, and the combination of Low and Medium to High
levels(warpbreaks$tension)

## [1] "L" "M" "H"

contrast(means.tension, list(LvM = c(-1,1,0), LMvH=c(.5,.5, -1)))

##  contrast   estimate       SE df t.ratio p.value
##  LvM      -10.000000 3.646761 48  -2.742  0.0086
##  LMvH       9.722222 3.158188 48   3.078  0.0034
## 
## Results are averaged over the levels of: wool

# contrast on the main effect of wool (not actually necessary since there's only 2 levels)
contrast(means.wool, list(AvB = c(-1,1)))

##  contrast  estimate       SE df t.ratio p.value
##  AvB      -5.777778 2.977568 48   -1.94  0.0582
## 
## Results are averaged over the levels of: tension

Contrasts on simple effects

# contrasts on simple effect of tension at wool=A
contrast(means.int, list(LvMforA = c(1,-1,0,0,0,0), LMvHforA=c(.5,.5,-1,0,0,0)))

##  contrast  estimate       SE df t.ratio p.value
##  LvMforA  20.555556 5.157299 48   3.986  0.0002
##  LMvHforA  9.722222 4.466352 48   2.177  0.0344

# contrasts on simple effect of tension at wool=B
contrast(means.int, list(LvMforB = c(0,0,0,1,-1,0), LMvHforB=c(0,0,0,.5,.5,-1)))

##  contrast   estimate       SE df t.ratio p.value
##  LvMforB  -0.5555556 5.157299 48  -0.108  0.9147
##  LMvHforB  9.7222222 4.466352 48   2.177  0.0344

# you can apply contrasts for simple effects quickly by using the "by" argument
contrast(means.int, list(LvM = c(1,-1,0), LMvH=c(.5,.5,-1)), by="wool")

## wool = A:
##  contrast   estimate       SE df t.ratio p.value
##  LvM      20.5555556 5.157299 48   3.986  0.0002
##  LMvH      9.7222222 4.466352 48   2.177  0.0344
## 
## wool = B:
##  contrast   estimate       SE df t.ratio p.value
##  LvM      -0.5555556 5.157299 48  -0.108  0.9147
##  LMvH      9.7222222 4.466352 48   2.177  0.0344

Use standard contrasts

# use built-in polynomial contrasts on tension (linear and quadratic)
contrast(means.tension, method = "poly")

##  contrast    estimate       SE df t.ratio p.value
##  linear    -14.722222 3.646761 48  -4.037  0.0002
##  quadratic   5.277778 6.316376 48   0.836  0.4075
## 
## Results are averaged over the levels of: wool

# wanna see the contrast weights it uses?
poly.lsmc(1:3)

##   linear quadratic
## 1     -1         1
## 2      0        -2
## 3      1         1

poly.lsmc(1:4)

##   linear quadratic cubic
## 1     -3         1    -1
## 2     -1        -1     3
## 3      1        -1    -3
## 4      3         1     1

poly.lsmc(1:5)

##   linear quadratic cubic quartic
## 1     -2         2    -1       1
## 2     -1        -1     2      -4
## 3      0        -2     0       6
## 4      1        -1    -2      -4
## 5      2         2     1       1

### Setting up a custom contrast function (Helmert contrasts, in this case)
helmert.lsmc <- function(levs, ...) {
  M <- as.data.frame(contr.helmert(levs))
  names(M) <- paste(levs[-1],"vs earlier")
  attr(M, "desc") <- "Helmert contrasts"
  M
}
contrast(means.tension, method = "helmert")

##  contrast      estimate       SE df t.ratio p.value
##  M vs earlier -10.00000 3.646761 48  -2.742  0.0086
##  H vs earlier -19.44444 6.316376 48  -3.078  0.0034
## 
## Results are averaged over the levels of: wool

# you can apply contrasts for simple effects quickly by using the "by" argument
contrast(means.int, method = "poly", by="wool")

## wool = A:
##  contrast    estimate       SE df t.ratio p.value
##  linear    -20.000000 5.157299 48  -3.878  0.0003
##  quadratic  21.111111 8.932705 48   2.363  0.0222
## 
## wool = B:
##  contrast    estimate       SE df t.ratio p.value
##  linear     -9.444444 5.157299 48  -1.831  0.0733
##  quadratic -10.555556 8.932705 48  -1.182  0.2432

Important note about interpreting contrast estimates: The contrast estimate depends on both the difference between the groups being compared and the weights of the contrast itself. You can test the same basic contrast with different weights — for example, you can look for a linear trend in 3 groups with c(1, 0, -1) or c(.5, 0, -.5) and they’ll do the same thing (compare the first group to the last group) and return exactly the same t-test and p-value. The contrast estimate itself will differ, though. This can lead to discrepencies in how you interpret the contrast estimate for different methods of defining contrasts.

For example, consider two different ways of testing orthogonal polynomial contrasts, using the weights in lsmeans and the weights in stats.

# Here are the weights used for polynomial contrasts in base R:
contr.poly(3)

##                 .L         .Q
## [1,] -7.071068e-01  0.4082483
## [2,] -7.850462e-17 -0.8164966
## [3,]  7.071068e-01  0.4082483

# And here are the weights for the same contrats in lsmeans:
poly.lsmc(1:3)

##   linear quadratic
## 1     -1         1
## 2      0        -2
## 3      1         1

### Setting up a custom contrast function (the stats polynomial contrasts, in this case)
polyalt.lsmc <- function(levs, ...) {
  M <- as.data.frame(contr.poly(levs))
  M
}
contrast(means.tension, method = "polyalt") # the stats weights

##  contrast   estimate      SE df t.ratio p.value
##  .L       -10.410183 2.57865 48  -4.037  0.0002
##  .Q         2.154644 2.57865 48   0.836  0.4075
## 
## Results are averaged over the levels of: wool

contrast(means.tension, method = "poly") # the lsmeans weights

##  contrast    estimate       SE df t.ratio p.value
##  linear    -14.722222 3.646761 48  -4.037  0.0002
##  quadratic   5.277778 6.316376 48   0.836  0.4075
## 
## Results are averaged over the levels of: wool

Note that both of these are totally acceptable orthogonal weights for polynomial contrasts. The t-tests and p-values are exactly the same, because they’re testing the exact same effect.

In general, the most interesting statsistic to report is the literal difference between groups. Sometimes that will be the same as your contrast estimate and sometimes it won’t (the contrast estimate will be the difference between groups times some constant). The safest bet is to always check the group means themselves, which you can get from lsmeans(), to make sure you know what the difference between the groups is for the contrast you’re reporting.

What are the differences between group means in this case?

means.tension

##  tension   lsmean      SE df lower.CL upper.CL
##  L       36.38889 2.57865 48 31.20417 41.57361
##  M       26.38889 2.57865 48 21.20417 31.57361
##  H       21.66667 2.57865 48 16.48194 26.85139
## 
## Results are averaged over the levels of: wool 
## Confidence level used: 0.95

# linear contrast: H vs. L
21.66667 - 36.38889

## [1] -14.72222

# quadratic contrast: The mean of H and L vs. M
mean(c(21.66667, 36.38889)) - 26.38889

## [1] 2.63889

Post-hoc adjustments for pairwise comparisons

# post-hoc comparisons of all means (using Tukey's HSD)
pairs(means.int, adjust = "tukey")

##  contrast    estimate       SE df t.ratio p.value
##  L,A - M,A 20.5555556 5.157299 48   3.986  0.0030
##  L,A - H,A 20.0000000 5.157299 48   3.878  0.0041
##  L,A - L,B 16.3333333 5.157299 48   3.167  0.0302
##  L,A - M,B 15.7777778 5.157299 48   3.059  0.0398
##  L,A - H,B 25.7777778 5.157299 48   4.998  0.0001
##  M,A - H,A -0.5555556 5.157299 48  -0.108  1.0000
##  M,A - L,B -4.2222222 5.157299 48  -0.819  0.9627
##  M,A - M,B -4.7777778 5.157299 48  -0.926  0.9377
##  M,A - H,B  5.2222222 5.157299 48   1.013  0.9115
##  H,A - L,B -3.6666667 5.157299 48  -0.711  0.9797
##  H,A - M,B -4.2222222 5.157299 48  -0.819  0.9627
##  H,A - H,B  5.7777778 5.157299 48   1.120  0.8706
##  L,B - M,B -0.5555556 5.157299 48  -0.108  1.0000
##  L,B - H,B  9.4444444 5.157299 48   1.831  0.4561
##  M,B - H,B 10.0000000 5.157299 48   1.939  0.3919
## 
## P value adjustment: tukey method for comparing a family of 6 estimates

# bonferroni adjustment
pairs(means.int, adjust = "bon")

##  contrast    estimate       SE df t.ratio p.value
##  L,A - M,A 20.5555556 5.157299 48   3.986  0.0034
##  L,A - H,A 20.0000000 5.157299 48   3.878  0.0048
##  L,A - L,B 16.3333333 5.157299 48   3.167  0.0402
##  L,A - M,B 15.7777778 5.157299 48   3.059  0.0544
##  L,A - H,B 25.7777778 5.157299 48   4.998  0.0001
##  M,A - H,A -0.5555556 5.157299 48  -0.108  1.0000
##  M,A - L,B -4.2222222 5.157299 48  -0.819  1.0000
##  M,A - M,B -4.7777778 5.157299 48  -0.926  1.0000
##  M,A - H,B  5.2222222 5.157299 48   1.013  1.0000
##  H,A - L,B -3.6666667 5.157299 48  -0.711  1.0000
##  H,A - M,B -4.2222222 5.157299 48  -0.819  1.0000
##  H,A - H,B  5.7777778 5.157299 48   1.120  1.0000
##  L,B - M,B -0.5555556 5.157299 48  -0.108  1.0000
##  L,B - H,B  9.4444444 5.157299 48   1.831  1.0000
##  M,B - H,B 10.0000000 5.157299 48   1.939  0.8759
## 
## P value adjustment: bonferroni method for 15 tests