Life spans of wild type vs. transgenic mosquitoes
Example based on Joseph C. Watkins. The life span in days of 88 wildtype and 99 transgenic mosquitoes is given in the following data set.
\(t-\)interval assuming equal variance
# Required package
library(ggplot2)
# Reading the data online
mosquitoes<-read.delim("https://www.math.arizona.edu/~jwatkins//mosquitoes.txt")
# Displaying the data
# See also: http://rpubs.com/FJRubio/TwoSamplesVT
boxplot(mosquitoes)

# First sample
X <- mosquitoes[!is.na(mosquitoes[,1]),1]
m <- length(X)
m
## [1] 88
var(X)
## [1] 168.7689
summary(X)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 11.00 18.00 20.78 24.00 52.00
# Second sample
Y <- mosquitoes[!is.na(mosquitoes[,2]),2]
n <- length(Y)
n
## [1] 99
var(Y)
## [1] 116.2096
summary(Y)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 9.50 16.00 16.55 22.00 50.00
# Violin plots of the two populations
df <- reshape2::melt(data.frame(cbind(X,Y)), id.vars = NULL)
## Warning in cbind(X, Y): number of rows of result is not a multiple of
## vector length (arg 1)
vp <- ggplot(df, aes(x = variable, y = value)) + geom_violin(scale="width",adjust = 1,width = 0.5,fill = "grey80") + geom_boxplot(width=0.075,fatten = 3) +
theme_bw() + xlab("Data") + ylab("Density") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14)) +
theme(axis.title.x = element_text(size = 14), axis.title.y = element_text(size = 14)) +
stat_summary(fun.y = mean, colour = c("red","blue"), geom="point", shape=18, size=4) +
scale_x_discrete(labels = c("Wildtype", "Transgenic"))
vp

# Pooled variance
Sp2 <- ((m-1)*var(X) + (n-1)*var(Y))/(m+n-2)
Sp2
## [1] 140.9267
# t-interval : equal variance
Int <- c( mean(X) - mean(Y) - qt(0.99,df=m+n-2)*sqrt(Sp2)*sqrt(1/m+1/n), mean(X) - mean(Y) + qt(0.99,df=m+n-2)*sqrt(Sp2)*sqrt(1/m+1/n))
library(knitr)
kable(Int,digits = 4)
\(t-\)interval using Welch and Satterthwaite’s approximation
library(knitr)
# Reading the data online
mosquitoes<-read.delim("https://www.math.arizona.edu/~jwatkins//mosquitoes.txt")
# Displaying the data
boxplot(mosquitoes)

# First sample
X <- mosquitoes[!is.na(mosquitoes[,1]),1]
m <- length(X)
# Second sample
Y <- mosquitoes[!is.na(mosquitoes[,2]),2]
n <- length(Y)
# Effective degrees of freedom
nu <- (var(X)/m + var(Y)/n)^2/( var(X)^2/(m^2*(m-1)) + var(Y)^2/(n^2*(n-1)) )
kable(nu,digits = 4)
# Studentised variance
SV <- var(X)/m + var(Y)/n
# t-interval : equal variance
Int <- c( mean(X) - mean(Y) - qt(0.975,df=nu)*sqrt(SV), mean(X) - mean(Y) + qt(0.975,df=nu)*sqrt(SV))
kable(Int,digits = 4)
# Calculating this same interval using the command t-test
t.test(X,Y,conf.level=0.95)$ conf.int
## [1] 0.7676486 7.7096242
## attr(,"conf.level")
## [1] 0.95
Both approaches give similar results in this case, although the second approach produces a confidence interval that is farther from zero.
Test of Significance
Consider testing a difference in the mean life span. This is
\[H_0: \mu_1 = \mu_2 \,\,\,\,\,\, vs. \,\,\,\,\,\, H_1: \mu_1 \neq \mu_2\]
library(knitr)
# Reading the data online
mosquitoes<-read.delim("https://www.math.arizona.edu/~jwatkins//mosquitoes.txt")
# Displaying the data
boxplot(mosquitoes)

# First sample
X <- mosquitoes[!is.na(mosquitoes[,1]),1]
m <- length(X)
# Second sample
Y <- mosquitoes[!is.na(mosquitoes[,2]),2]
n <- length(Y)
# Effective degrees of freedom
nu <- (var(X)/m + var(Y)/n)^2/( var(X)^2/(m^2*(m-1)) + var(Y)^2/(n^2*(n-1)) )
kable(nu,digits = 4)
# Studentised variance
SV <- var(X)/m + var(Y)/n
# t-statistic
t0 <- (mean(X) - mean(Y))/sqrt(SV)
# P-value
P <- pt(-t0 , df = nu) + 1-pt(t0 , df = nu)
kable(P,digits = 4)
# Calculating the p-value the command t-test
t.test(X,Y,conf.level=0.95, alternative = "two.sided")
##
## Welch Two Sample t-test
##
## data: X and Y
## t = 2.4106, df = 169.67, p-value = 0.01699
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.7676486 7.7096242
## sample estimates:
## mean of x mean of y
## 20.78409 16.54545
We now want to test if the transgenic modification of the mosquitoes reduces the mean life span. This is
\[H_0: \mu_1 \leq \mu_2 \,\,\,\,\,\, vs. \,\,\,\,\,\, H_1: \mu_1 > \mu_2\]
library(knitr)
# Reading the data online
mosquitoes<-read.delim("https://www.math.arizona.edu/~jwatkins//mosquitoes.txt")
# First sample
X <- mosquitoes[!is.na(mosquitoes[,1]),1]
m <- length(X)
# Second sample
Y <- mosquitoes[!is.na(mosquitoes[,2]),2]
n <- length(Y)
# Effective degrees of freedom
nu <- (var(X)/m + var(Y)/n)^2/( var(X)^2/(m^2*(m-1)) + var(Y)^2/(n^2*(n-1)) )
kable(nu,digits = 4)
# Studentised variance
SV <- var(X)/m + var(Y)/n
# t-statistic
t0 <- (mean(X) - mean(Y))/sqrt(SV)
# P-value
P <- 1-pt(t0 , df = nu)
kable(P,digits = 4)
# Calculating the p-value the command t-test
t.test(X,Y,conf.level=0.95, alternative = "greater")
##
## Welch Two Sample t-test
##
## data: X and Y
## t = 2.4106, df = 169.67, p-value = 0.008497
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 1.330591 Inf
## sample estimates:
## mean of x mean of y
## 20.78409 16.54545