Tooth Growth data analysis

Loading the dataset and creating sub datasets for further analysis

library(ggplot2)
library(UsingR)
library(dplyr)
data(ToothGrowth)
#Creating different groups of by supplements and dosages
x <- filter(ToothGrowth, supp == "OJ" & dose == 0.5)
y <- filter(ToothGrowth, supp == "OJ" & dose == 1)
z <- filter(ToothGrowth, supp == "OJ" & dose == 2)
a <- filter(ToothGrowth, supp == "VC" & dose == 0.5)
b <- filter(ToothGrowth, supp == "VC" & dose == 1)
c <- filter(ToothGrowth, supp == "VC" & dose == 2)

Exploratory Data Analysis

summary(ToothGrowth)
##       len        supp         dose      
##  Min.   : 4.20   OJ:30   Min.   :0.500  
##  1st Qu.:13.07   VC:30   1st Qu.:0.500  
##  Median :19.25           Median :1.000  
##  Mean   :18.81           Mean   :1.167  
##  3rd Qu.:25.27           3rd Qu.:2.000  
##  Max.   :33.90           Max.   :2.000
mn <- mean(ToothGrowth$len)
  • Mean of tooth growth is 18.8133333
g <- ggplot(ToothGrowth, aes(x = factor(ToothGrowth$dose), y = ToothGrowth$len))
g <- g + geom_boxplot(aes(fill = ToothGrowth$supp)) + facet_grid(.~supp)
g + labs(x = "Dose Levels", y = "Tooth Length", Title = "Box Plot to Summarize Data")

  • This plot shows that tooth lenght is higher for higher dosages of vitamine C.
  • Also for smaller dosages i.e. 0.5, 1 mg better effect is achieved for delivery method orange juice.

Confidence intervals formation for tooth growth by supp and dose

  • Creation of confidence interval for OJ supplements and different dose types
#Applying T Test to calculate confidence interval and creating matrix of values
cimatrix <- rbind(mean(x$len) + c(-1, 1)*qt(0.975, 9)*sd(x$len)/sqrt(10))
cimatrix <- rbind(cimatrix, mean(y$len) + c(-1, 1)*qt(0.975, 9)*sd(y$len)/sqrt(10))
cimatrix <- rbind(cimatrix, mean(z$len) + c(-1, 1)*qt(0.975, 9)*sd(z$len)/sqrt(10))
cimatrix <- rbind(cimatrix, mean(a$len) + c(-1, 1)*qt(0.975, 9)*sd(a$len)/sqrt(10))
cimatrix <- rbind(cimatrix, mean(b$len) + c(-1, 1)*qt(0.975, 9)*sd(b$len)/sqrt(10))
cimatrix <- rbind(cimatrix, mean(c$len) + c(-1, 1)*qt(0.975, 9)*sd(c$len)/sqrt(10))
rownames(cimatrix) <- c("OJ-0.5", "OJ-1", "OJ-2", "VC-0.5", "VC-1", "VC-2")
colnames(cimatrix) <- c("Lower", "Upper")
cimatrix
##            Lower     Upper
## OJ-0.5 10.039717 16.420283
## OJ-1   19.902273 25.497727
## OJ-2   24.160686 27.959314
## VC-0.5  6.015176  9.944824
## VC-1   14.970657 18.569343
## VC-2   22.707910 29.572090

Hypothesis testing

  • Hypothesis test to identify if the mean of different supplement factors and dosages are different. We will use two sample T test with equal variance for testing this hypothesis.
twosample <- t.test(x$len, a$len, var.equal = TRUE, alternative = "two.sided", paired = TRUE)$p.value
twosample <- rbind(twosample, t.test(y$len, b$len, var.equal = TRUE, alternative = "two.sided", paired = TRUE)$p.value)
twosample <- rbind(twosample, t.test(z$len, c$len, var.equal = TRUE, alternative = "two.sided", paired = TRUE)$p.value)
colnames(twosample) <- "P-Value of Two Sample T Test"
rownames(twosample) <- c("OJ vs VC 0.5", "OJ vs VC 1", "OJ vs VC 2")
twosample
##              P-Value of Two Sample T Test
## OJ vs VC 0.5                  0.015472048
## OJ vs VC 1                    0.008229248
## OJ vs VC 2                    0.966956704
  • At lower dosages i.e 0.5 mg and 1 mg Orange juice provides more growth than ascorvic acid.
  • At higher dosage i.e 2 mg there is no statistically significant difference difference between supplement methods.