Load Libraries
# Load libraries
library(datasets)
library(ggplot2)
library(pastecs)
library(dplyr)
Now in the second portion of the project, we’re going to analyze the ToothGrowth data in the R datasets package.
data(ToothGrowth)
str(ToothGrowth)
## 'data.frame': 60 obs. of 3 variables:
## $ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
## $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
## $ dose: num 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
head(ToothGrowth)
## len supp dose
## 1 4.2 VC 0.5
## 2 11.5 VC 0.5
## 3 7.3 VC 0.5
## 4 5.8 VC 0.5
## 5 6.4 VC 0.5
## 6 10.0 VC 0.5
hist(ToothGrowth$len,breaks=20, col="red",xlab="Tooth Length",ylab="Frequency", main ="Tooth Growth Length in Guinea Pigs")
Below is a summary of data statistics for the ToothGrowth data.
The chart was provided as part of the documentation for the ToothGrowth dataset.
stat.desc(ToothGrowth)
## len supp dose
## nbr.val 60.0000000 NA 60.00000000
## nbr.null 0.0000000 NA 0.00000000
## nbr.na 0.0000000 NA 0.00000000
## min 4.2000000 NA 0.50000000
## max 33.9000000 NA 2.00000000
## range 29.7000000 NA 1.50000000
## sum 1128.8000000 NA 70.00000000
## median 19.2500000 NA 1.00000000
## mean 18.8133333 NA 1.16666667
## SE.mean 0.9875223 NA 0.08118705
## CI.mean.0.95 1.9760276 NA 0.16245491
## var 58.5120226 NA 0.39548023
## std.dev 7.6493152 NA 0.62887219
## coef.var 0.4065901 NA 0.53903330
summary(ToothGrowth)
## len supp dose
## Min. : 4.20 OJ:30 Min. :0.500
## 1st Qu.:13.07 VC:30 1st Qu.:0.500
## Median :19.25 Median :1.000
## Mean :18.81 Mean :1.167
## 3rd Qu.:25.27 3rd Qu.:2.000
## Max. :33.90 Max. :2.000
###
#Chart provided in ToothGrowth data summary document
coplot(len ~ dose | supp, data = ToothGrowth, panel=panel.smooth,
xlab = "ToothGrowth data: length vs dose, given type of supplement")
## toothgDW<-dcast(ToothGrowth,dose+len~supp,value.var = "len")
T Confidence interval was used to analyze the differences of the two groups of Guinea Pigs supplied with Vitamin C and Orange juice respectively.
library(ggplot2)
library(dplyr)
# add a group column with a sequence of numbers per supplement
ToothGrowth<-mutate(ToothGrowth, groupID = ifelse(supp == "VC",seq(30),seq(30)))
ToothGrowth$groupID<-as.factor(ToothGrowth$groupID)
ToothGrowth$dose<-as.factor(ToothGrowth$dose)
g <- ggplot(ToothGrowth, aes(x = supp, y = len, group = groupID))
g <- g + geom_line(size = 1, aes(colour = groupID)) + geom_point(size =10, pch = 21, fill = "blue", alpha = .5)
g <- g + ggtitle("Tooth Growth Length based on suplement")
g
g <- ggplot(ToothGrowth, aes(x = dose, y = len, group = dose))
g <- g + geom_boxplot(size = 1, aes(colour = dose)) + facet_grid(.~supp)
g <- g + ggtitle("Tooth Growth Length based on dosage")
g
Calculating the T Confidence Interval based on CLT Theorem and based on T.test R function.
#create two groups
g1<- ToothGrowth[ToothGrowth$supp == "VC",]$len
g2<- ToothGrowth[ToothGrowth$supp == "OJ",]$len
#calculate T CIs for independent groups
n1<-length(g1)
n2<-length(g2)
sp<- sqrt( ((n1 -1) * sd(g1)^2 + (n2-1) * sd(g2)^2) / (n1 + n2 - 2))
md <- mean(g2) - mean(g1)
semd <- sp * sqrt(1/ n1 + 1/n2)
rbind(
md + c(-1,1) * qt(0.975, n1 + n2 -1) * semd,
t.test(g2,g1,paired=FALSE,var.equal = TRUE)$conf
)
## [,1] [,2]
## [1,] -0.1656114 7.565611
## [2,] -0.1670064 7.567006
Further analysis to identify variations based on a specific pair of doses.
TG01 <- subset(ToothGrowth, dose %in% c(0.5,1))
TG02 <- subset(ToothGrowth, dose %in% c(0.5,2))
TG12 <- subset(ToothGrowth, dose %in% c(1,2))
rbind(
t.test(len~ dose, paired=FALSE, var.equal=TRUE, data=TG01)$conf,
t.test(len~ dose, paired=FALSE, var.equal=TRUE, data=TG02)$conf,
t.test(len~ dose, paired=FALSE, var.equal=TRUE, data=TG12)$conf
)
## [,1] [,2]
## [1,] -11.983748 -6.276252
## [2,] -18.153519 -12.836481
## [3,] -8.994387 -3.735613
The sample population is considered to be independent and randomly selected. A total of 60 guinea pigs were indenpendently selected and splitted into two main groups based on supplement type of OV or VC. Each guinea pig was administered only one specific dose (1/2,1,2)mg of either OJ or VC.
The confidence interval obtained from the sample distribution is very similar to the one obtained useing the R function t.test. As a conclusion the difference in supplement type does not seem to have much impact. Although, the increase on the dosage seems to be have a direct impact with the tooth growth. A new analysis with bigger higher number of observations would be recommended. (bring more guinea pigs)