days <- c(240, 206, 217, 225, 110, 118, 103, 95, 56, 60, 68, 58, 71, 53, 68,
57, 47, 52, 31, 49, 37, 33, 40, 45)
type <- factor(c("A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B"))
years <- factor(c(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
6, 6, 6, 6))
years2 <- factor(c(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2,
2, 3, 3, 3, 3)) # This is the way the data is coded for problems in the book
Box plots for each teacher can be appropiate, as they are the experimental unit.
boxplot(days ~ type)
boxplot(days ~ years)
boxplot(days ~ type:years2)
library(lattice)
dotplot(days ~ years2 | type)
What if we do it correctly?
res1 <- lm(days ~ type + type/years)
anova(res1)
## Analysis of Variance Table
##
## Response: days
## Df Sum Sq Mean Sq F value Pr(>F)
## type 1 39447 39447 458 3.0e-14 ***
## type:years 4 56577 14144 164 6.7e-14 ***
## Residuals 18 1550 86
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
res2 <- lm(days ~ type + type/years2)
anova(res2)
## Analysis of Variance Table
##
## Response: days
## Df Sum Sq Mean Sq F value Pr(>F)
## type 1 39447 39447 458 3.0e-14 ***
## type:years2 4 56577 14144 164 6.7e-14 ***
## Residuals 18 1550 86
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Which schools are different?
TukeyHSD(aov(res1), "type")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = res1)
##
## $type
## diff lwr upr p adj
## B-A -81.08 -89.04 -73.12 0
TukeyHSD(aov(res2), "type")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = res2)
##
## $type
## diff lwr upr p adj
## B-A -81.08 -89.04 -73.12 0
Do we want all the teacher comparisons? Probably not.
TukeyHSD(aov(res1), "type:years") #Yikes
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = res1)
##
## $`type:years`
## diff lwr upr p adj
## B:1-A:1 NA NA NA NA
## A:2-A:1 -115.50 -139.94 -91.0629 0.0000
## B:2-A:1 NA NA NA NA
## A:3-A:1 -161.50 -185.94 -137.0629 0.0000
## B:3-A:1 NA NA NA NA
## A:4-A:1 NA NA NA NA
## B:4-A:1 -159.75 -184.19 -135.3129 0.0000
## A:5-A:1 NA NA NA NA
## B:5-A:1 -177.25 -201.69 -152.8129 0.0000
## A:6-A:1 NA NA NA NA
## B:6-A:1 -183.25 -207.69 -158.8129 0.0000
## A:2-B:1 NA NA NA NA
## B:2-B:1 NA NA NA NA
## A:3-B:1 NA NA NA NA
## B:3-B:1 NA NA NA NA
## A:4-B:1 NA NA NA NA
## B:4-B:1 NA NA NA NA
## A:5-B:1 NA NA NA NA
## B:5-B:1 NA NA NA NA
## A:6-B:1 NA NA NA NA
## B:6-B:1 NA NA NA NA
## B:2-A:2 NA NA NA NA
## A:3-A:2 -46.00 -70.44 -21.5629 0.0001
## B:3-A:2 NA NA NA NA
## A:4-A:2 NA NA NA NA
## B:4-A:2 -44.25 -68.69 -19.8129 0.0001
## A:5-A:2 NA NA NA NA
## B:5-A:2 -61.75 -86.19 -37.3129 0.0000
## A:6-A:2 NA NA NA NA
## B:6-A:2 -67.75 -92.19 -43.3129 0.0000
## A:3-B:2 NA NA NA NA
## B:3-B:2 NA NA NA NA
## A:4-B:2 NA NA NA NA
## B:4-B:2 NA NA NA NA
## A:5-B:2 NA NA NA NA
## B:5-B:2 NA NA NA NA
## A:6-B:2 NA NA NA NA
## B:6-B:2 NA NA NA NA
## B:3-A:3 NA NA NA NA
## A:4-A:3 NA NA NA NA
## B:4-A:3 1.75 -22.69 26.1871 1.0000
## A:5-A:3 NA NA NA NA
## B:5-A:3 -15.75 -40.19 8.6871 0.4513
## A:6-A:3 NA NA NA NA
## B:6-A:3 -21.75 -46.19 2.6871 0.1078
## A:4-B:3 NA NA NA NA
## B:4-B:3 NA NA NA NA
## A:5-B:3 NA NA NA NA
## B:5-B:3 NA NA NA NA
## A:6-B:3 NA NA NA NA
## B:6-B:3 NA NA NA NA
## B:4-A:4 NA NA NA NA
## A:5-A:4 NA NA NA NA
## B:5-A:4 NA NA NA NA
## A:6-A:4 NA NA NA NA
## B:6-A:4 NA NA NA NA
## A:5-B:4 NA NA NA NA
## B:5-B:4 -17.50 -41.94 6.9371 0.3143
## A:6-B:4 NA NA NA NA
## B:6-B:4 -23.50 -47.94 0.9371 0.0657
## B:5-A:5 NA NA NA NA
## A:6-A:5 NA NA NA NA
## B:6-A:5 NA NA NA NA
## A:6-B:5 NA NA NA NA
## B:6-B:5 -6.00 -30.44 18.4371 0.9979
## B:6-A:6 NA NA NA NA
TukeyHSD(aov(res2), "school:years2")
## Error: 'which' specified no factors
contrast(res2, list(school = "A", teacher = "1"), list(school = "A", teacher = "2")) #Bummer
## Error: could not find function "contrast"
library(lsmeans)
## Warning: package 'lsmeans' was built under R version 2.15.3
lsmeans(res2, pairwise ~ type:years2)
## $`type:years2 lsmeans`
## type years2 lsmean SE df lower.CL upper.CL
## A 1 222.00 4.64 18 212.25 231.75
## B 1 62.25 4.64 18 52.50 72.00
## A 2 106.50 4.64 18 96.75 116.25
## B 2 44.75 4.64 18 35.00 54.50
## A 3 60.50 4.64 18 50.75 70.25
## B 3 38.75 4.64 18 29.00 48.50
##
## $`type:years2 pairwise differences`
## estimate SE df t.ratio p.value
## A, 1 - B, 1 159.75 6.562 18 24.3440 0.00000
## A, 1 - A, 2 115.50 6.562 18 17.6008 0.00000
## A, 1 - B, 2 177.25 6.562 18 27.0108 0.00000
## A, 1 - A, 3 161.50 6.562 18 24.6106 0.00000
## A, 1 - B, 3 183.25 6.562 18 27.9251 0.00000
## B, 1 - A, 2 -44.25 6.562 18 -6.7432 0.00003
## B, 1 - B, 2 17.50 6.562 18 2.6668 0.13141
## B, 1 - A, 3 1.75 6.562 18 0.2667 0.99978
## B, 1 - B, 3 23.50 6.562 18 3.5811 0.02212
## A, 2 - B, 2 61.75 6.562 18 9.4100 0.00000
## A, 2 - A, 3 46.00 6.562 18 7.0098 0.00002
## A, 2 - B, 3 67.75 6.562 18 10.3243 0.00000
## B, 2 - A, 3 -15.75 6.562 18 -2.4001 0.20777
## B, 2 - B, 3 6.00 6.562 18 0.9143 0.93774
## A, 3 - B, 3 21.75 6.562 18 3.3144 0.03809
## p values are adjusted using the tukey method for 6 means
Using lsmeans gives the same results as TukeyHSD. However, you can get p-values that are not adjusted.
lsmeans(res2, pairwise ~ type:years2, adjust = "none")
## $`type:years2 lsmeans`
## type years2 lsmean SE df lower.CL upper.CL
## A 1 222.00 4.64 18 212.25 231.75
## B 1 62.25 4.64 18 52.50 72.00
## A 2 106.50 4.64 18 96.75 116.25
## B 2 44.75 4.64 18 35.00 54.50
## A 3 60.50 4.64 18 50.75 70.25
## B 3 38.75 4.64 18 29.00 48.50
##
## $`type:years2 pairwise differences`
## estimate SE df t.ratio p.value
## A, 1 - B, 1 159.75 6.562 18 24.3440 0.00000
## A, 1 - A, 2 115.50 6.562 18 17.6008 0.00000
## A, 1 - B, 2 177.25 6.562 18 27.0108 0.00000
## A, 1 - A, 3 161.50 6.562 18 24.6106 0.00000
## A, 1 - B, 3 183.25 6.562 18 27.9251 0.00000
## B, 1 - A, 2 -44.25 6.562 18 -6.7432 0.00000
## B, 1 - B, 2 17.50 6.562 18 2.6668 0.01572
## B, 1 - A, 3 1.75 6.562 18 0.2667 0.79275
## B, 1 - B, 3 23.50 6.562 18 3.5811 0.00214
## A, 2 - B, 2 61.75 6.562 18 9.4100 0.00000
## A, 2 - A, 3 46.00 6.562 18 7.0098 0.00000
## A, 2 - B, 3 67.75 6.562 18 10.3243 0.00000
## B, 2 - A, 3 -15.75 6.562 18 -2.4001 0.02742
## B, 2 - B, 3 6.00 6.562 18 0.9143 0.37263
## A, 3 - B, 3 21.75 6.562 18 3.3144 0.00386
## p values are not adjusted
Since we are only intrested in the three of the pairwise comparisons, the Bonferonni adjusted p-value is \( \frac{\alpha}{g} \), where g is the number of comparisons. Since g=3, any adjusted p-value less than .017 is significant. Therefore they are all significant. In the end, you can use the TukeyHSD and just look at the interesting contrasts.