Preparatory steps:
tdata = ToothGrowth
tdata$dose = as.factor(tdata$dose)
Let’s try to look at the data and how the tooth length differs depending on on the source of vitamin c and the dose.
library(ggplot2)
g = ggplot(aes(x=dose, y = len), data = tdata) +
facet_wrap( ~ supp, nrow = 1, ncol=2) +
geom_boxplot(aes(fill=dose)) +
geom_point(aes(color = dose)) +
scale_colour_manual(breaks = c("0.5", "1", "2"),
labels = c("0.5 mg", "1 mg", "2 mg"),
values = c("#F0E442", "#0072B2", "#D55E00")) +
scale_fill_manual(breaks = c("0.5", "1", "2"),
labels = c("0.5 mg", "1 mg", "2 mg"),
values = c("#F0E442", "#0072B2", "#D55E00"))+
xlab("Dose") +
ylab("Tooth length")
g
We can see that there is definitely a connection between the dose and the tooth length. Higher doses seem to result in longer teeth. And this is true for both types of supplements - orange juice and vitamin C.
We will do the summary based on the differences we noticed in the previous section.
library(knitr)
vc = tdata[tdata$supp=="VC",]$len
oj = tdata[tdata$supp=="OJ",]$len
colnames = c("Mean", "SD", "Variance", "Min", "Max")
vc_summary = c(mean(vc), sd(vc), var(vc), min(vc), max(vc))
oj_summary = c(mean(oj), sd(oj), var(oj), min(oj), max(oj))
summary = rbind(vc_summary, oj_summary)
colnames(summary)=colnames
rownames(summary)=c("Vitamin C", "Orange Juice")
kable(summary, digits=2, caption = "Tooth length and supplement type summary", row.names=TRUE)
| Mean | SD | Variance | Min | Max | |
|---|---|---|---|---|---|
| Vitamin C | 16.96 | 8.27 | 68.33 | 4.2 | 33.9 |
| Orange Juice | 20.66 | 6.61 | 43.63 | 8.2 | 30.9 |
The next table is the summary for tooth length broken down into 3 pieces: one for each dose:
dose0.5 = tdata[tdata$dose=="0.5",]$len
colnames = c("Mean", "SD", "Variance", "Min", "Max")
values0.5 = c(mean(dose0.5), sd(dose0.5), var(dose0.5), min(dose0.5), max(dose0.5))
dose1 = tdata[tdata$dose=="1",]$len
values1 = c(mean(dose1), sd(dose1), var(dose1), min(dose1), max(dose1))
dose2 = tdata[tdata$dose=="2",]$len
values2 = c(mean(dose2), sd(dose2), var(dose2), min(dose2), max(dose2))
values = rbind(values0.5, values1, values2)
colnames(values)=colnames
rownames(values)=c("0.5 mg", "1 mg", "2 mg")
kable(values, digits=2, caption = "Tooth length and Vit. C doses summary", row.names=TRUE)
| Mean | SD | Variance | Min | Max | |
|---|---|---|---|---|---|
| 0.5 mg | 10.61 | 4.50 | 20.25 | 4.2 | 21.5 |
| 1 mg | 19.73 | 4.42 | 19.50 | 13.6 | 27.3 |
| 2 mg | 26.10 | 3.77 | 14.24 | 18.5 | 33.9 |
For this task we will use the t-tests. First let’s check the group difference for different supplement types.
t.test(len ~ supp, data = tdata)$conf
## [1] -0.1710156 7.5710156
## attr(,"conf.level")
## [1] 0.95
t.test(len ~ supp, data = tdata)$p.value
## [1] 0.06063451
The p-value is rather significant we cannot safely acclaim that there is a correlation between the supplement type and tooth length.
To run the t.test for the dose variable we need to create a grouping factor with two levels, so let’s make all possible combinations of doses: (0.5,1), (0.5,2) and (1,2):
dose0.5 = tdata[tdata$dose=="0.5",]
dose1 = tdata[tdata$dose=="1",]
dose2 = tdata[tdata$dose=="2",]
dose0.5to1 = rbind(dose0.5,dose1)
dose0.5to2 = rbind(dose0.5,dose2)
dose1to2 = rbind(dose1,dose2)
Now let’s look at group differences associated with different doses.
t.test(len ~ dose, data = dose0.5to1)$conf
## [1] -11.983781 -6.276219
## attr(,"conf.level")
## [1] 0.95
t.test(len ~ dose, data = dose0.5to1)$p.value
## [1] 1.268301e-07
t.test(len ~ dose, data = dose0.5to2)$conf
## [1] -18.15617 -12.83383
## attr(,"conf.level")
## [1] 0.95
t.test(len ~ dose, data = dose0.5to2)$p.value
## [1] 4.397525e-14
t.test(len ~ dose, data = dose1to2)$conf
## [1] -8.996481 -3.733519
## attr(,"conf.level")
## [1] 0.95
t.test(len ~ dose, data = dose1to2)$p.value
## [1] 1.90643e-05
So as we can see the p-value for all 3 combinations is very small which is a good sign and we can conclude that indeed higher doses of vitamin C result in longer teeth.
After doing the analysis we made 2 conclusions:
* The tooth length doesn’t depend on the supplement type (accepted the null-hypothesis)
* The tooth length does depend on the dose of the vitamin C: the higher the dose, the longer the tooth.
Assumptions:
* We assumed that this sample of 60 guinea pigs is a representative of the population * We assumed that the Variance is different in the t-test groups.