Overview

Show the summary of ToothGrowth data set, as well as boxplot of supp and dose.

library(datasets); library(ggplot2)
summary(ToothGrowth)
##       len        supp         dose      
##  Min.   : 4.20   OJ:30   Min.   :0.500  
##  1st Qu.:13.07   VC:30   1st Qu.:0.500  
##  Median :19.25           Median :1.000  
##  Mean   :18.81           Mean   :1.167  
##  3rd Qu.:25.27           3rd Qu.:2.000  
##  Max.   :33.90           Max.   :2.000
ToothGrowth$dose <- as.factor(ToothGrowth$dose)
g_supp <- ggplot(data = ToothGrowth, aes(x=supp, y=len)) + geom_boxplot(aes(fill=supp))
g_dose <- ggplot(data = ToothGrowth, aes(x=dose, y=len)) + geom_boxplot(aes(fill=dose))
g_supp; g_dose

ggplot(data=ToothGrowth, aes(x=dose, y=len, fill=supp)) + geom_bar(stat="identity")+
    facet_grid(. ~ supp) +
    xlab("Dose in miligrams") +
    ylab("Tooth length") +
    guides(fill=guide_legend(title="Supplement type"))

Confidence Interval

sample_size <- dim(ToothGrowth)[1]; n = nx = ny = sample_size/2
supp_OJ <- subset(ToothGrowth, supp == "OJ")
supp_VC <- subset(ToothGrowth, supp == "VC")
meanx <- mean(supp_OJ$len); meany <- mean(supp_VC$len); varx <- var(supp_OJ$len); vary <- var(supp_VC$len)
sd <- sqrt(varx/n + vary/n)
df <- sd^4 / ( (varx/n)^2/(n-1) + (vary/n)^2/(n-1) )
CI <- meanx-meany + c(-1,1)* qt(0.975, df)*sd
print(CI)
## [1] -0.1710156  7.5710156

The 95% confidence interval of supp is -0.1710156 7.5710156, contains 0, means the treatment supp has no significant effect on length of tooth with 95% confidence.

CI <- meanx-meany + c(-1,1)* qt(0.95, df)*sd

However the 90% confidence interval of supp is 0.4682687 6.9317313, greater than 0, means the treatment supp has significant effect on length of tooth with 90% confidence.

In addition, t.test could get the same conclusion:

t.test(len ~ supp, data = ToothGrowth)
## 
##  Welch Two Sample t-test
## 
## data:  len by supp
## t = 1.9153, df = 55.309, p-value = 0.06063
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1710156  7.5710156
## sample estimates:
## mean in group OJ mean in group VC 
##         20.66333         16.96333
t.test(len ~ dose, data = subset(ToothGrowth, dose=="0.5" | dose=="1"))
## 
##  Welch Two Sample t-test
## 
## data:  len by dose
## t = -6.4766, df = 37.986, p-value = 1.268e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -11.983781  -6.276219
## sample estimates:
## mean in group 0.5   mean in group 1 
##            10.605            19.735
t.test(len ~ dose, data = subset(ToothGrowth, dose=="1" | dose=="2"))
## 
##  Welch Two Sample t-test
## 
## data:  len by dose
## t = -4.9005, df = 37.101, p-value = 1.906e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -8.996481 -3.733519
## sample estimates:
## mean in group 1 mean in group 2 
##          19.735          26.100
t.test(len ~ dose, data = subset(ToothGrowth, dose=="0.5" | dose=="2"))
## 
##  Welch Two Sample t-test
## 
## data:  len by dose
## t = -11.799, df = 36.883, p-value = 4.398e-14
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -18.15617 -12.83383
## sample estimates:
## mean in group 0.5   mean in group 2 
##            10.605            26.100

Conclusion