1 Load the ToothGrowth data and perform some basic exploratory data analyses

setwd("C:/Users/stephanie song/Desktop")
library(ggplot2)
str(ToothGrowth)
## 'data.frame':    60 obs. of  3 variables:
##  $ len : num  4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
##  $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
##  $ dose: num  0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
dim(ToothGrowth)
## [1] 60  3

2 Provide a basic summary of the data

summary(ToothGrowth)
##       len        supp         dose      
##  Min.   : 4.20   OJ:30   Min.   :0.500  
##  1st Qu.:13.07   VC:30   1st Qu.:0.500  
##  Median :19.25           Median :1.000  
##  Mean   :18.81           Mean   :1.167  
##  3rd Qu.:25.27           3rd Qu.:2.000  
##  Max.   :33.90           Max.   :2.000
aggregate(len ~ supp, summary, data=ToothGrowth)
##   supp len.Min. len.1st Qu. len.Median len.Mean len.3rd Qu. len.Max.
## 1   OJ     8.20       15.52      22.70    20.66       25.72    30.90
## 2   VC     4.20       11.20      16.50    16.96       23.10    33.90
table(ToothGrowth$supp)
## 
## OJ VC 
## 30 30
table(ToothGrowth$dose)
## 
## 0.5   1   2 
##  20  20  20
tapply(ToothGrowth$len,ToothGrowth$supp, sd)
##       OJ       VC 
## 6.605561 8.266029

use ggplot to show tooth length by different type of supplement and different dose.

ggplot(data=data.frame(ToothGrowth), aes(x=ToothGrowth$dose, y=ToothGrowth$len)) +
        geom_point(shape=1, size=5) +
        theme_bw() +
        facet_grid(supp~dose, scales="free", space="free") +
        labs(x="dose") +
        labs(y="Tooth length by supplements type") +
        labs(title="Tooth growth distribution by supplements type") +
        theme(strip.text.x = element_text(size=12, face="bold", angle=0),
              strip.text.y = element_text(size=12, face="bold", angle=0),
              strip.background = element_rect(colour="red", fill="#CCCCFF"))

From plot we see generally tooth growth is affected by dose, and is affected by supplement type. Then we will use t-test to demonstrate it.

t.test(len ~ supp, paired=FALSE, var.equal=FALSE, data=ToothGrowth)
## 
##  Welch Two Sample t-test
## 
## data:  len by supp
## t = 1.9153, df = 55.309, p-value = 0.06063
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1710156  7.5710156
## sample estimates:
## mean in group OJ mean in group VC 
##         20.66333         16.96333

t is 1.9153, is included in 95% confidence interval, df is larger, therefore we will not reject null hypothesis tooth growth is affected by supplement type.

3 grouping factor must have exactly 2 levels, therefore we must group dose by levels, 0.5 with 1.0, 1.0 with 2.0, and 0.5 with 2.0.

dose1<-subset(ToothGrowth, ToothGrowth$dose %in% c(0.5,1.0))
dose2<-subset(ToothGrowth, ToothGrowth$dose %in% c(2.0,1.0))
dose3<-subset(ToothGrowth, ToothGrowth$dose %in% c(0.5,2.0))
t.test(len ~ dose, paired=FALSE, var.equal=FALSE, data=dose1)
## 
##  Welch Two Sample t-test
## 
## data:  len by dose
## t = -6.4766, df = 37.986, p-value = 1.268e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -11.983781  -6.276219
## sample estimates:
## mean in group 0.5   mean in group 1 
##            10.605            19.735
t.test(len ~ dose, paired=FALSE, var.equal=FALSE, data=dose2)
## 
##  Welch Two Sample t-test
## 
## data:  len by dose
## t = -4.9005, df = 37.101, p-value = 1.906e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -8.996481 -3.733519
## sample estimates:
## mean in group 1 mean in group 2 
##          19.735          26.100
t.test(len ~ dose, paired=FALSE, var.equal=FALSE, data=dose3)
## 
##  Welch Two Sample t-test
## 
## data:  len by dose
## t = -11.799, df = 36.883, p-value = 4.398e-14
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -18.15617 -12.83383
## sample estimates:
## mean in group 0.5   mean in group 2 
##            10.605            26.100

t is included in 95% confidence interval and p value is smaller than .05, we accept the null hypothesis that tooth grwoth is affected by dose difference.

Conclusions

Assumptions