This document analayzes the ToothGrowth data in the R datasets package.
library(ggplot2)
data("ToothGrowth")
str(ToothGrowth)
## 'data.frame': 60 obs. of 3 variables:
## $ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
## $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
## $ dose: num 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
head(ToothGrowth)
## len supp dose
## 1 4.2 VC 0.5
## 2 11.5 VC 0.5
## 3 7.3 VC 0.5
## 4 5.8 VC 0.5
## 5 6.4 VC 0.5
## 6 10.0 VC 0.5
#look at the distribution of Tooth growth with Vitamin C "VC" supplement
hist(subset(ToothGrowth$len,ToothGrowth$supp == "VC"), main = "", xlab="")
title(main = "Histogram of Tooth growth with Vitamin C (VC) supplement")
#look at the distribution of Tooth growth with Orange Juice "OJ" supplement
hist(subset(ToothGrowth$len,ToothGrowth$supp == "OJ"), main = "", xlab="")
title(main = "Histogram of Tooth growth with Orange Juice (OJ) supplement")
#plot dose versus Tooth growth by supplement
ggplot(ToothGrowth, aes(dose, len, color=supp)) + geom_line() + geom_point() + facet_wrap(~supp) + labs(y="Tooth growth", title ="Tooth growth vs dose by supplement (Orange Juice (OJ) & Vitamin C (VC))") + theme(plot.title = element_text(size=12, face="bold"))
dev.off()
## null device
## 1
#Tooth growth by supplement and dose
ggplot(ToothGrowth, aes(dose, len, color=supp)) + geom_boxplot() + facet_wrap(dose~supp, nrow = 3, ncol = 2) + labs(y="Tooth growth", title ="Summary of Tooth growth by supplement (Orange Juice (OJ) & Vitamin C (VC)) and dose (0.5, 1 and 2)") + theme(plot.title = element_text(size=10, face="bold"))
dev.off()
## null device
## 1
#95% confidence interval of length with VC supplement and dose 0.5,1 and 2
VC_d0.5 <- quantile(subset(ToothGrowth$len,ToothGrowth$supp == "VC" & ToothGrowth$dose == 0.5), c(0.025, 0.50, 0.975))
VC_d1 <- quantile(subset(ToothGrowth$len,ToothGrowth$supp == "VC" & ToothGrowth$dose == 1), c(0.025, 0.50, 0.975))
VC_d2 <- quantile(subset(ToothGrowth$len,ToothGrowth$supp == "VC" & ToothGrowth$dose == 2), c(0.025, 0.50, 0.975))
#95% confidence interval of length with OJ supplement and dose 0.5,1 and 2
OJ_d0.5 <- quantile(subset(ToothGrowth$len,ToothGrowth$supp == "OJ" & ToothGrowth$dose == 0.5), c(0.025, 0.50, 0.975))
OJ_d1 <- quantile(subset(ToothGrowth$len,ToothGrowth$supp == "OJ" & ToothGrowth$dose == 1), c(0.025, 0.50, 0.975))
OJ_d2 <- quantile(subset(ToothGrowth$len,ToothGrowth$supp == "OJ" & ToothGrowth$dose == 2), c(0.025, 0.50, 0.975))
CI <- data.frame(rbind(VC_d0.5, VC_d1, VC_d2, OJ_d0.5, OJ_d1, OJ_d2))
CI$supp <- c("VC", "VC", "VC", "OJ", "OJ", "OJ")
CI$dose <- c("0.5", "1", "2", "0.5", "1", "2")
colnames(CI) <- c("L", "Median", "U", "supp", "dose")
#plot confidence intervals
ggplot(CI, aes(x=c(1,2,3,4,5,6), y = Median, color = supp)) + geom_point() + geom_errorbar(aes(ymax=U, ymin=L)) + facet_wrap(dose ~ supp, nrow=3, ncol=2) + labs(x="", y="Tooth growth", title ="95% confidence interval of Tooth growth by supplement (OJ vs VC) and dose 0.5, 1 and 2") + theme(plot.title = element_text(size=12, face="bold"))
dev.off()
## null device
## 1
#Is there a difference in tooth growth by supplement and dose
VC1 = subset(ToothGrowth$len,ToothGrowth$supp == "VC" & ToothGrowth$dose == 0.5)
OJ1 = subset(ToothGrowth$len,ToothGrowth$supp == "OJ" & ToothGrowth$dose == 0.5)
#assume equal variance across subjects
t1 <- t.test(VC1, OJ1, alternative = "two.sided", paired = FALSE, var.equal = TRUE)
t1
##
## Two Sample t-test
##
## data: VC1 and OJ1
## t = -3.1697, df = 18, p-value = 0.005304
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -8.729738 -1.770262
## sample estimates:
## mean of x mean of y
## 7.98 13.23
VC2 = subset(ToothGrowth$len,ToothGrowth$supp == "VC" & ToothGrowth$dose == 1)
OJ2 = subset(ToothGrowth$len,ToothGrowth$supp == "OJ" & ToothGrowth$dose == 1)
#assume equal variance across subjects
t2 <- t.test(VC2, OJ2, alternative = "two.sided", paired = FALSE, var.equal = TRUE)
t2
##
## Two Sample t-test
##
## data: VC2 and OJ2
## t = -4.0328, df = 18, p-value = 0.0007807
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -9.019308 -2.840692
## sample estimates:
## mean of x mean of y
## 16.77 22.70
VC3 = subset(ToothGrowth$len,ToothGrowth$supp == "VC" & ToothGrowth$dose == 2)
OJ3 = subset(ToothGrowth$len,ToothGrowth$supp == "OJ" & ToothGrowth$dose == 2)
#assume equal variance across subjects
t3 <- t.test(VC3, OJ3, alternative = "two.sided", paired = FALSE, var.equal = TRUE)
t3
##
## Two Sample t-test
##
## data: VC3 and OJ3
## t = 0.046136, df = 18, p-value = 0.9637
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.562999 3.722999
## sample estimates:
## mean of x mean of y
## 26.14 26.06
Assuming equal variance across subjects we can conclude the following from the hypothesis tests:
For dose equal to 0.5: We reject the true difference in Tooth growth means by supplement is equal to 0 (pvalue = 0.0053037)
For dose equal to 1: we reject the true difference in Tooth growth means by supplement is equal to 0 (pvalue = 7.810^{-4})
For dose equal to 2: we fail to reject the true difference in Tooth growth means by supplement is equal to 0 (pvalue = 0.9637098)