library(lmSupport)
library(ggplot2)
source('http://psych.colorado.edu/~jclab/R/mcSummaryLm.R')
?ToothGrowth # this displays info about the dataset in the "Viewer" pane
# for Vitamin C Supplementation
ToothGrowth$C_supp <- ifelse(ToothGrowth$supp == "OJ", 0.5, -0.5)
# for dosage
ToothGrowth$d_lin <- (-1/2)*(ToothGrowth$dose == 0.5) + (0)*(ToothGrowth$dose == 1) + (1/2)*(ToothGrowth$dose == 2)
ToothGrowth$d_quad <- (-1/3)*(ToothGrowth$dose == 0.5) + (2/3)* (ToothGrowth$dose == 1) + (-1/3)*(ToothGrowth$dose == 2)
Contrast codes allow us to test for effects collapsing across factors. We cannot obtain simple effects using contrast codes.
# C Supplement
ToothGrowth$C_supp <- NA
ToothGrowth$C_supp[ToothGrowth$supp == "OJ"] <- 1/2
ToothGrowth$C_supp[ToothGrowth$supp == "VC"] <- -1/2
# Dosage
ToothGrowth$d_lin <- NA
ToothGrowth$d_lin[ToothGrowth$dose == 0.5] <- -1/2
ToothGrowth$d_lin[ToothGrowth$dose == 1] <- 0
ToothGrowth$d_lin[ToothGrowth$dose == 2] <- 1/2
ToothGrowth$d_quad <- NA
ToothGrowth$d_quad[ToothGrowth$dose == 0.5] <- -1/3
ToothGrowth$d_quad[ToothGrowth$dose == 1] <- 2/3
ToothGrowth$d_quad[ToothGrowth$dose == 2] <- -1/3
prep = rnorm(60,10,60)
parent = rep(c("no", "yes"), times = 30)
community = rep(c(1:6), each = 10)
d <- data.frame(prep = prep,
parent = parent,
community = community)
d$community_binary <- NA
d$community_binary[d$community == 1] <- "A"
d$community_binary[d$community == 2] <- "A"
d$community_binary[d$community == 3] <- "A"
d$community_binary[d$community == 4] <- "B"
d$community_binary[d$community == 5] <- "B"
d$community_binary[d$community == 6] <- "B"
d$community_binary <- NULL
# This line gives all 6 group means in one go
tapply(ToothGrowth$len, INDEX = list(ToothGrowth$supp, ToothGrowth$dose), mean, na.rm = T)
## 0.5 1 2
## OJ 13.23 22.70 26.06
## VC 7.98 16.77 26.14
# and the SDs
tapply(ToothGrowth$len, INDEX = list(ToothGrowth$supp, ToothGrowth$dose), sd, na.rm = T)
## 0.5 1 2
## OJ 4.459709 3.910953 2.655058
## VC 2.746634 2.515309 4.797731
# and the marginal means:
## for vitamin C type
tapply(ToothGrowth$len, INDEX = list(ToothGrowth$supp), mean, na.rm = T)
## OJ VC
## 20.66333 16.96333
## for dosage
tapply(ToothGrowth$len, INDEX = list(ToothGrowth$dose), mean, na.rm = T)
## 0.5 1 2
## 10.605 19.735 26.100
# full interaction model
m1 <- lm(len ~ C_supp + d_lin + d_quad + C_supp*d_lin + C_supp*d_quad, data = ToothGrowth)
# OR
m1 <- lm(len ~ C_supp + d_lin + d_quad + C_supp:d_lin + C_supp:d_quad, data = ToothGrowth)
# OR
m1 <- lm(len ~ C_supp * (d_lin + d_quad), data = ToothGrowth)
# output: all three of the above models are identical.
mcSummary(m1)
## Loading required package: car
## Loading required package: carData
## lm(formula = len ~ C_supp * (d_lin + d_quad), data = ToothGrowth)
##
## Omnibus ANOVA
## SS df MS EtaSq F p
## Model 2740.103 5 548.021 0.794 41.557 0
## Error 712.106 54 13.187
## Corr Total 3452.209 59 58.512
##
## RMSE AdjEtaSq
## 3.631 0.775
##
## Coefficients
## Est StErr t SSR(3) EtaSq tol CI_2.5 CI_97.5 p
## (Intercept) 18.813 0.469 40.130 21236.491 0.968 NA 17.873 19.753 0.000
## C_supp 3.700 0.938 3.946 205.350 0.224 1 1.820 5.580 0.000
## d_lin 15.495 1.148 13.493 2400.950 0.771 1 13.193 17.797 0.000
## d_quad 1.382 0.995 1.390 25.484 0.035 1 -0.611 3.376 0.170
## C_supp:d_lin -5.330 2.297 -2.321 71.022 0.091 1 -9.935 -0.725 0.024
## C_supp:d_quad 3.345 1.989 1.682 37.297 0.050 1 -0.643 7.333 0.098
From here, what lines are significant? Also, can we describe both main effects (of vitamin C type and of dosage)?
No. Let’s write up everything we can at this point:
Summary so far
We conducted an ANOVA of the 2 (vitamin C type: OJ or absorbic acid) x 3 (dosage: 0.5mg, 1mg, or 2mg) design using planned contrasts. There was a main effect of vitamin C type, such that orange juice led to more tooth growth than ascorbic acid (M = 20.66 vs. M = 16.96, b = 3.70, F(1,54) = 15.57, p < .001, PRE = .22). There was also a linear effect of dose, such that guinea pigs with higher doses of vitamin C had more tooth growth (M_0.5mg = 10.61 vs. M_2mg = 26.10, F(1,54) = 182.06, p < .001, PRE = .77), collapsing across vitamin C type. The quadratic effect of dosage was not significant (F(1,54) = 1.93, p = .170, PRE = .04).
There was also a significant interaction between vitamin C type and linear dosage, indicating that the linear effect of dosage differed based on type of vitamin C supplementation (F(1,54) = 5.39, p = .024, PRE = .09).
What are we missing? -Main effect of dosage -SIMPLE EFFECTS that make up our interaction.
Dummy codes are what we need to describe simple effects!
# for Vitamin C Supplementation
## OJ vs. AA
ToothGrowth$OJ_aa <- ifelse(ToothGrowth$supp == "OJ", 0, 1) # I always like to put the name of the other group in my dummy codes to remind me what group I'm comparing against... so this code would be FOR guinea pigs getting OJ, compared WITH guineas getting AA.
## Ascorbic acid vs OJ
ToothGrowth$AA_oj <- ifelse(ToothGrowth$supp == "OJ", 1, 0)
# for dosage:
## 0.5 mg dummies
ToothGrowth$half_one.d <- (0)*(ToothGrowth$dose == 0.5) + (1)*(ToothGrowth$dose == 1) + (0)*(ToothGrowth$dose == 2)
ToothGrowth$half_two.d <- (0)*(ToothGrowth$dose == 0.5) + (0)* (ToothGrowth$dose == 1) + (1)*(ToothGrowth$dose == 2)
## 1 mg dummies
ToothGrowth$one_half.d <- (1)*(ToothGrowth$dose == 0.5) + (0)*(ToothGrowth$dose == 1) + (0)*(ToothGrowth$dose == 2)
ToothGrowth$one_two.d <- (0)*(ToothGrowth$dose == 0.5) + (0)* (ToothGrowth$dose == 1) + (1)*(ToothGrowth$dose == 2)
## 2 mg dummies
ToothGrowth$two_half.d <- (1)*(ToothGrowth$dose == 0.5) + (0)*(ToothGrowth$dose == 1) + (0)*(ToothGrowth$dose == 2)
ToothGrowth$two_one.d <- (0)*(ToothGrowth$dose == 0.5) + (1)* (ToothGrowth$dose == 1) + (0)*(ToothGrowth$dose == 2)
We can ask and answer a whole lot of questions using a combination of contrast codes and dummy codes!
But let’s focus on the interaction that was significant above: C_supp*d_lin.
m2 <- lm(len ~ AA_oj + d_lin + d_quad + AA_oj:d_lin + AA_oj:d_quad, data = ToothGrowth)
mcSummary(m2)
## lm(formula = len ~ AA_oj + d_lin + d_quad + AA_oj:d_lin + AA_oj:d_quad,
## data = ToothGrowth)
##
## Omnibus ANOVA
## SS df MS EtaSq F p
## Model 2740.103 5 548.021 0.794 41.557 0
## Error 712.106 54 13.187
## Corr Total 3452.209 59 58.512
##
## RMSE AdjEtaSq
## 3.631 0.775
##
## Coefficients
## Est StErr t SSR(3) EtaSq tol CI_2.5 CI_97.5 p
## (Intercept) 16.963 0.663 25.586 8632.640 0.924 NA 15.634 18.293 0.000
## AA_oj 3.700 0.938 3.946 205.350 0.224 1.0 1.820 5.580 0.000
## d_lin 18.160 1.624 11.182 1648.928 0.698 0.5 14.904 21.416 0.000
## d_quad -0.290 1.406 -0.206 0.561 0.001 0.5 -3.110 2.530 0.837
## AA_oj:d_lin -5.330 2.297 -2.321 71.022 0.091 0.5 -9.935 -0.725 0.024
## AA_oj:d_quad 3.345 1.989 1.682 37.297 0.050 0.5 -0.643 7.333 0.098
Our model is now centered at the “ascorbic acid” level; so now, all parameters refer to effects that occur only among guinea pigs who received AA as their supplement type!
The answer to our question is found by looking at the “d_lin” line in m2: It looks like there is a significant simple effect of linear dosage for guinea pigs receiving ascorbic acid supplements, b = 18.16, F(1,54) = 125.04, PRE = .70, p < .001.
m3 <- lm(len ~ OJ_aa + d_lin + d_quad + OJ_aa:d_lin + OJ_aa:d_quad, data = ToothGrowth)
mcSummary(m3)
## lm(formula = len ~ OJ_aa + d_lin + d_quad + OJ_aa:d_lin + OJ_aa:d_quad,
## data = ToothGrowth)
##
## Omnibus ANOVA
## SS df MS EtaSq F p
## Model 2740.103 5 548.021 0.794 41.557 0
## Error 712.106 54 13.187
## Corr Total 3452.209 59 58.512
##
## RMSE AdjEtaSq
## 3.631 0.775
##
## Coefficients
## Est StErr t SSR(3) EtaSq tol CI_2.5 CI_97.5 p
## (Intercept) 20.663 0.663 31.166 12809.200 0.947 NA 19.334 21.993 0.000
## OJ_aa -3.700 0.938 -3.946 205.350 0.224 1.0 -5.580 -1.820 0.000
## d_lin 12.830 1.624 7.900 823.045 0.536 0.5 9.574 16.086 0.000
## d_quad 3.055 1.406 2.172 62.220 0.080 0.5 0.235 5.875 0.034
## OJ_aa:d_lin 5.330 2.297 2.321 71.022 0.091 0.5 0.725 9.935 0.024
## OJ_aa:d_quad -3.345 1.989 -1.682 37.297 0.050 0.5 -7.333 0.643 0.098
Now all parameters refer to effects that occur only among guinea pigs who received OJ as their supplement type, because we used the dummy code associated with OJ, rather than AA! Looking at the “d_lin” line in m3, we see there is again a significant simple effect of linear dosage for guinea pigs receiving OJ, b = 12.83, F(1,54) = 62.41, PRE = .54, p < .001.
So now we can add a bit more information to our write up! We can officially decompose the interaction of interest.
Summary including simple effects for linear dose
A significant interaction (F(1,54) = 5.39, p = .024, PRE = .09) indicated that the linear effect of dosage differed based on type of vitamin C supplementation, such the linear effect of dosage was larger for guinea pigs getting ascorbic acid (the simple effect is \(M_0.5mg\) = 7.98 vs. \(M_2mg\) = 26.14, F(1,54) = 125.04, PRE = .70, p < .001) than for guinea pigs getting OJ (simple effect is \(M_0\)\(_.5\)\(_m\)\(g\) = 13.23 vs. \(M_2mg\) = 26.06, F(1,54) = 62.41, PRE = .54, p < .001).
But wait… what’s left?
This one we need to build a new model C, and conduct a 2df test.
# Model A is just m1, our augmented model.
m1 <- lm(len ~ C_supp * (d_lin + d_quad), data = ToothGrowth)
# Model C is a simple model that DOES NOT INCLUDE the linear and quadratic effect of dosage. HOWEVER, you still need to include their higher order interactions! Here is the code to do that.
m4_c <- lm(len ~ C_supp + C_supp:d_lin + C_supp:d_quad, data = ToothGrowth)
mcSummary(m4_c) # notice that this summary has C supplementation and its interactions with d_lin and d_quad--but those two are dropped from the model!
## lm(formula = len ~ C_supp + C_supp:d_lin + C_supp:d_quad, data = ToothGrowth)
##
## Omnibus ANOVA
## SS df MS EtaSq F p
## Model 313.669 3 104.556 0.091 1.866 0.146
## Error 3138.540 56 56.045
## Corr Total 3452.209 59 58.512
##
## RMSE AdjEtaSq
## 7.486 0.042
##
## Coefficients
## Est StErr t SSR(3) EtaSq tol CI_2.5 CI_97.5 p
## (Intercept) 18.813 0.966 19.466 21236.491 0.871 NA 16.877 20.749 0.000
## C_supp 3.700 1.933 1.914 205.350 0.061 1 -0.172 7.572 0.061
## C_supp:d_lin -5.330 4.735 -1.126 71.022 0.022 1 -14.815 4.155 0.265
## C_supp:d_quad 3.345 4.100 0.816 37.297 0.012 1 -4.869 11.559 0.418
# And the answer.
modelCompare(m4_c, m1)
## SSE (Compact) = 3138.54
## SSE (Augmented) = 712.106
## Delta R-Squared = 0.7028642
## Partial Eta-Squared (PRE) = 0.7731092
## F(2,54) = 91.99996, p = 4.046291e-18
There is a main effect of dosage (F(2,54) = 92.00, p < .001, PRE = .77)
We conducted an ANOVA of the 2 (vitamin C type: OJ or absorbic acid) x 3 (dosage: 0.5mg, 1mg, or 2mg) design using planned contrasts. There was a main effect of vitamin C type, such that orange juice led to more tooth growth than ascorbic acid (M = 20.66 vs. M = 16.96, b = 3.70, F(1,54) = 15.57, p < .001, PRE = .22). There was also a main effect of dosage (F(2,54) = 92.00, p < .001, PRE = .77). This effect was driven by the linear effect of dose, such that guinea pigs with higher doses of vitamin C had more tooth growth (M_0.5mg = 10.61 vs. M_2mg = 26.10, F(1,54) = 182.06, p < .001, PRE = .77), collapsing across vitamin C type. The quadratic effect of dosage was not significant (F(1,54) = 1.93, p = .170, PRE = .04).
A significant interaction (F(1,54) = 5.39, p = .024, PRE = .09) indicated that the linear effect of dosage differed based on type of vitamin C supplementation, such the linear effect of dosage was larger for guinea pigs getting ascorbic acid (the simple effect is M_0.5mg = 7.98 vs. M_2mg = 26.14, F(1,54) = 125.04, PRE = .70, p < .001) than for guinea pigs getting OJ (simple effect is M_0.5mg = 13.23 vs. M_2mg = 26.06, F(1,54) = 62.41, PRE = .54, p < .001).
hmat <- tapply(ToothGrowth$len, INDEX = list(ToothGrowth$supp, ToothGrowth$dose), FUN = mean, na.rm = T)
barplot(hmat, beside = T,
legend.text = T, args.legend = c(list(x='topleft'), title = "C Supplement"),
main = "Tooth Length by Condition",
xlab = "Dose (mg/day)",
ylab = "Odontoblast Length (mm)")
toothplot <- ggplot(ToothGrowth,
aes(x = factor(dose),
y = len,
fill = supp)) +
geom_bar(stat = 'summary', fun = 'mean',
position = position_dodge()) +
stat_summary(fun.data = mean_se, #this uses standard errors of the mean, other arguments are possible!
geom = "errorbar",
position = position_dodge(.9),
width=.3,
aes(y = len, x = factor(dose), fill = supp))
## Warning: Ignoring unknown aesthetics: fill
toothplot +
theme_classic() +
scale_fill_manual("C Supplement", values = c("orange3","navyblue")) +
xlab("Dose (mg/day)") +
ylab("Tooth Length by Condition")