Structure: Exploratory data analyses, Summary of the data, Hypothesis testing
set.seed(12345)
library(datasets)
data <- ToothGrowth
str(data)
## 'data.frame': 60 obs. of 3 variables:
## $ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
## $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
## $ dose: num 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
library(ggplot2)
library(gridExtra)
## Loading required package: grid
data$dose <- factor(data$dose)
gplot1 <- ggplot(data, aes(x=dose, y=len))+ geom_violin(alpha=0.5, color="darkorange3") +
geom_jitter(alpha=0.5, aes(color=supp), position = position_jitter(width = 0.1)) +
labs(x="Dose",y="") + facet_grid(.~supp) + theme(legend.position="none") +
scale_colour_brewer(palette="Set2")
data <- ToothGrowth
gplot2<- ggplot(data, aes(x=dose, y=len, colour=supp)) +
geom_point(alpha=.5, size=5) + facet_grid(.~supp) + scale_colour_brewer(palette="Set2") +
stat_smooth(method="auto") + theme(legend.position=c(1.04,0), legend.justification=c(1,0)) +
labs(x="",y="") +
guides(colour = guide_legend(title="Supplement",override.aes = list(size=4)))
grid.arrange(gplot1, gplot2, ncol=2,main="Tooth length by dose based on the supplement type",respect=F,just="center",left="Tooth Length")
names(data) <- c("Tooth length","Supplement","Dose")
summary(data)
## Tooth length Supplement Dose
## Min. : 4.20 OJ:30 Min. :0.500
## 1st Qu.:13.07 VC:30 1st Qu.:0.500
## Median :19.25 Median :1.000
## Mean :18.81 Mean :1.167
## 3rd Qu.:25.27 3rd Qu.:2.000
## Max. :33.90 Max. :2.000
library(dplyr)
data <- ToothGrowth
dataSum <- group_by(data,supp,dose)
dataSum %>% summarise(Mean=mean(len),SD=sd(len))
## Source: local data frame [6 x 4]
## Groups: supp
##
## supp dose Mean SD
## 1 OJ 0.5 13.23 4.459709
## 2 OJ 1.0 22.70 3.910953
## 3 OJ 2.0 26.06 2.655058
## 4 VC 0.5 7.98 2.746634
## 5 VC 1.0 16.77 2.515309
## 6 VC 2.0 26.14 4.797731
\(# Ho: The supplement type does not affect tooth growth.\) \(#Ha: The supplement type does affect tooth growth.\)
t.test(len~supp,data=data,conf.level=.95,var.equal=T,paired=F,alternative ="two.sided")
##
## Two Sample t-test
##
## data: len by supp
## t = 1.9153, df = 58, p-value = 0.06039
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1670064 7.5670064
## sample estimates:
## mean in group OJ mean in group VC
## 20.66333 16.96333
\(# Ho: The dosage does not affect tooth growth.\) \(#Ha: The dosage does affect tooth growth.\)
t.test(len~dose,dose %in% c(0.5,1.0),data=data,conf.level=.95,var.equal=T,paired=F,alternative ="two.sided")
##
## Two Sample t-test
##
## data: len by dose
## t = -6.4766, df = 38, p-value = 1.266e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -11.983748 -6.276252
## sample estimates:
## mean in group 0.5 mean in group 1
## 10.605 19.735
t.test(len~dose,dose %in% c(1.0,2.0),data=data,conf.level=.95,var.equal=T,paired=F,alternative ="two.sided")
##
## Two Sample t-test
##
## data: len by dose
## t = -4.9005, df = 38, p-value = 1.811e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -8.994387 -3.735613
## sample estimates:
## mean in group 1 mean in group 2
## 19.735 26.100
t.test(len~dose,dose %in% c(0.5,2.0),data=data,conf.level=.95,var.equal=T,paired=F,alternative ="two.sided")
##
## Two Sample t-test
##
## data: len by dose
## t = -11.799, df = 38, p-value = 2.838e-14
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -18.15352 -12.83648
## sample estimates:
## mean in group 0.5 mean in group 2
## 10.605 26.100