library(datasets)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data("ToothGrowth")
ToothGrowth$dose <- as.factor(ToothGrowth$dose)
head(ToothGrowth)
summary(ToothGrowth)
## len supp dose
## Min. : 4.20 OJ:30 0.5:20
## 1st Qu.:13.07 VC:30 1 :20
## Median :19.25 2 :20
## Mean :18.81
## 3rd Qu.:25.27
## Max. :33.90
ggplot(ToothGrowth, aes(x=dose,y=len, fill=dose)) + geom_boxplot()+facet_grid(.~supp) + labs(x= "Dose (mg)", y = "Length", title = "The Effect of Supplement Type on Tooth Growth")
Null hypothesis: There is no difference between the supplement type on tooth growth, across all doses, across all doses.
t0.5 <- t.test(len ~ supp, data = ToothGrowth[ToothGrowth$dose == 0.5,])
t1 <- t.test(len ~ supp , data = ToothGrowth[ToothGrowth$dose == 1,])
t2 <- t.test(len~ supp, data = ToothGrowth[ToothGrowth$dose == 2,])
data.frame("p-value" = c(t0.5$p.value,t1$p.value,t2$p.value),
"CI Low" = c(t0.5$conf.int[1], t1$conf.int[1],t2$conf.int[1]),
"CI High" = c(t0.5$conf.int[2],t1$conf.int[2],t2$conf.int[2]),
row.names = c("0.5 mg", "1 mg","2 mg"))
From this test, we see very small p-values at lower doses (0.5 mg and 1 mg). This indicates that there is a difference in mean length between supplemental types. We cannot reject the null hypothesis at either of these doeses.At the highest dose (2 mg), we see a very high p-value indicating that we cannot reject the null hypothesis.
Null hypothesis: There is no difference between 0.5 mg and 1 mg doses for each supplement type.
#ToothGrowth1 <- aggregate(ToothGrowth$len, by=list(ToothGrowth$supp,ToothGrowth$dose),mean)
x <- ToothGrowth[(ToothGrowth$dose == 0.5) | (ToothGrowth$dose == 1),]
A <- t.test(len ~ dose, data = x[x$supp == "OJ",])
B <- t.test(len ~ dose, data = x[x$supp == "VC",])
Null hypothesis: There is no difference between 1 mg and 2 mg doses for each supplement type.
#ToothGrowth1 <- aggregate(ToothGrowth$len, by=list(ToothGrowth$supp,ToothGrowth$dose),mean)
y <- ToothGrowth[(ToothGrowth$dose == 1) | (ToothGrowth$dose == 2),]
C <- t.test(len ~ dose, data = x[x$supp == "OJ",])
D <- t.test(len ~ dose, data = x[x$supp == "VC",])
Summary table for doses:
data.frame("p-value" = c(A$p.value,B$p.value,C$p.value, D$p.value),
"CI Low" = c(A$conf.int[1], B$conf.int[1], C$conf.int[1], D$conf.int[1]),
"CI High" = c(A$conf.int[2], B$conf.int[2], C$conf.int[2], D$conf.int[2]),
row.names = c("0.5 mg - 1 mg (OJ)", "0.5 mg - 1 mg (VC)","1 mg - 2 mg (OJ)", "1 mg - 2 mg (VC)"))
Here, we compared doses within each supplement type. All tests show a very small p-value indicating there is a substantive difference between each dose across both supplement types. In all cases, we can reject the null hypothesis.
We can state at 95% confidence level that there is likely a difference between supplement types at the 0.5 mg and 1 mg dose level. Conversely, we can also state with the same confidence that there is a difference between dose levels for both supplement types.