Overview

This document analayzes the ToothGrowth data in the R datasets package.

Exploratory Analysis

library(ggplot2)

data("ToothGrowth")

str(ToothGrowth)
## 'data.frame':    60 obs. of  3 variables:
##  $ len : num  4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
##  $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
##  $ dose: num  0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
head(ToothGrowth)
##    len supp dose
## 1  4.2   VC  0.5
## 2 11.5   VC  0.5
## 3  7.3   VC  0.5
## 4  5.8   VC  0.5
## 5  6.4   VC  0.5
## 6 10.0   VC  0.5
#look at the distribution of Tooth growth with Vitamin C "VC" supplement
hist(subset(ToothGrowth$len,ToothGrowth$supp == "VC"), main = "", xlab="")
title(main = "Histogram of Tooth growth with Vitamin C (VC) supplement")

#look at the distribution of Tooth growth with Orange Juice "OJ" supplement
hist(subset(ToothGrowth$len,ToothGrowth$supp == "OJ"), main = "", xlab="")
title(main = "Histogram of Tooth growth with Orange Juice (OJ) supplement")

#plot dose versus Tooth growth by supplement
ggplot(ToothGrowth, aes(dose, len, color=supp)) + geom_line() + geom_point() + facet_wrap(~supp) + labs(y="Tooth growth", title ="Tooth growth vs dose by supplement (Orange Juice (OJ) & Vitamin C (VC))") + theme(plot.title = element_text(size=12, face="bold"))

dev.off()
## null device 
##           1

Summary of the data

#Tooth growth by supplement and dose 
ggplot(ToothGrowth, aes(dose, len, color=supp)) + geom_boxplot() + facet_wrap(dose~supp, nrow = 3, ncol = 2)  + labs(y="Tooth growth", title ="Summary of Tooth growth by supplement (Orange Juice (OJ) & Vitamin C (VC)) and dose (0.5, 1 and 2)") + theme(plot.title = element_text(size=10, face="bold"))

dev.off()
## null device 
##           1

Confidence intervals of tooth growth by supplement and dose

#95% confidence interval of length with VC supplement and dose 0.5,1 and 2
VC_d0.5 <- quantile(subset(ToothGrowth$len,ToothGrowth$supp == "VC" & ToothGrowth$dose == 0.5), c(0.025, 0.50, 0.975))

VC_d1 <- quantile(subset(ToothGrowth$len,ToothGrowth$supp == "VC" & ToothGrowth$dose == 1), c(0.025, 0.50, 0.975))

VC_d2 <- quantile(subset(ToothGrowth$len,ToothGrowth$supp == "VC" & ToothGrowth$dose == 2), c(0.025, 0.50, 0.975))

#95% confidence interval of length with OJ supplement and dose 0.5,1 and 2
OJ_d0.5 <- quantile(subset(ToothGrowth$len,ToothGrowth$supp == "OJ" & ToothGrowth$dose == 0.5), c(0.025, 0.50, 0.975))

OJ_d1 <- quantile(subset(ToothGrowth$len,ToothGrowth$supp == "OJ" & ToothGrowth$dose == 1), c(0.025, 0.50, 0.975))

OJ_d2 <- quantile(subset(ToothGrowth$len,ToothGrowth$supp == "OJ" & ToothGrowth$dose == 2), c(0.025, 0.50, 0.975))

CI <- data.frame(rbind(VC_d0.5, VC_d1, VC_d2, OJ_d0.5, OJ_d1, OJ_d2))
CI$supp <- c("VC", "VC", "VC", "OJ", "OJ", "OJ")
CI$dose <- c("0.5", "1", "2", "0.5", "1", "2")
colnames(CI) <- c("L", "Median", "U", "supp", "dose")

#plot confidence intervals 
ggplot(CI, aes(x=c(1,2,3,4,5,6), y = Median, color = supp)) + geom_point() + geom_errorbar(aes(ymax=U, ymin=L)) + facet_wrap(dose ~ supp, nrow=3, ncol=2) + labs(x="", y="Tooth growth", title ="95% confidence interval of Tooth growth by supplement (OJ vs VC) and dose 0.5, 1 and 2") + theme(plot.title = element_text(size=12, face="bold"))

dev.off()
## null device 
##           1

Hypotheis testing

#Is there a difference in tooth growth by supplement and dose
VC1 = subset(ToothGrowth$len,ToothGrowth$supp == "VC" & ToothGrowth$dose == 0.5)
OJ1 = subset(ToothGrowth$len,ToothGrowth$supp == "OJ" & ToothGrowth$dose == 0.5)

#assume equal variance across subjects
t1 <- t.test(VC1, OJ1, alternative = "two.sided", paired = FALSE, var.equal = TRUE)
t1
## 
##  Two Sample t-test
## 
## data:  VC1 and OJ1
## t = -3.1697, df = 18, p-value = 0.005304
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -8.729738 -1.770262
## sample estimates:
## mean of x mean of y 
##      7.98     13.23
VC2 = subset(ToothGrowth$len,ToothGrowth$supp == "VC" & ToothGrowth$dose == 1)
OJ2 = subset(ToothGrowth$len,ToothGrowth$supp == "OJ" & ToothGrowth$dose == 1)

#assume equal variance across subjects
t2 <- t.test(VC2, OJ2, alternative = "two.sided", paired = FALSE, var.equal = TRUE)
t2
## 
##  Two Sample t-test
## 
## data:  VC2 and OJ2
## t = -4.0328, df = 18, p-value = 0.0007807
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -9.019308 -2.840692
## sample estimates:
## mean of x mean of y 
##     16.77     22.70
VC3 = subset(ToothGrowth$len,ToothGrowth$supp == "VC" & ToothGrowth$dose == 2)
OJ3 = subset(ToothGrowth$len,ToothGrowth$supp == "OJ" & ToothGrowth$dose == 2)

#assume equal variance across subjects
t3 <- t.test(VC3, OJ3, alternative = "two.sided", paired = FALSE, var.equal = TRUE)
t3
## 
##  Two Sample t-test
## 
## data:  VC3 and OJ3
## t = 0.046136, df = 18, p-value = 0.9637
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.562999  3.722999
## sample estimates:
## mean of x mean of y 
##     26.14     26.06

Conclusions

Assuming equal variance across subjects we can conclude the following from the hypothesis tests:

  1. For dose equal to 0.5: We reject the true difference in Tooth growth means by supplement is equal to 0 (pvalue = 0.0053037)

  2. For dose equal to 1: we reject the true difference in Tooth growth means by supplement is equal to 0 (pvalue = 7.810^{-4})

  3. For dose equal to 2: we fail to reject the true difference in Tooth growth means by supplement is equal to 0 (pvalue = 0.9637098)