Life spans of wild type vs. transgenic mosquitoes

Example based on Joseph C. Watkins. The life span in days of 88 wildtype and 99 transgenic mosquitoes is given in the following data set.

\(t-\)interval assuming equal variance

# Required package
library(ggplot2)

# Reading the data online
mosquitoes<-read.delim("https://www.math.arizona.edu/~jwatkins//mosquitoes.txt")

# Displaying the data
# See also: http://rpubs.com/FJRubio/TwoSamplesVT
boxplot(mosquitoes)

# First sample
X <- mosquitoes[!is.na(mosquitoes[,1]),1]
m <- length(X)
m
## [1] 88
var(X)
## [1] 168.7689
summary(X)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   11.00   18.00   20.78   24.00   52.00
# Second sample
Y <- mosquitoes[!is.na(mosquitoes[,2]),2]
n <- length(Y)
n
## [1] 99
var(Y)
## [1] 116.2096
summary(Y)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    9.50   16.00   16.55   22.00   50.00
# Violin plots of the two populations
df <- reshape2::melt(data.frame(cbind(X,Y)), id.vars = NULL)
## Warning in cbind(X, Y): number of rows of result is not a multiple of
## vector length (arg 1)
vp <- ggplot(df, aes(x = variable, y = value)) + geom_violin(scale="width",adjust = 1,width = 0.5,fill = "grey80") + geom_boxplot(width=0.075,fatten = 3) +
  theme_bw() + xlab("Data") + ylab("Density") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14)) +
  theme(axis.title.x = element_text(size = 14), axis.title.y = element_text(size = 14)) + 
  stat_summary(fun.y = mean, colour = c("red","blue"), geom="point", shape=18, size=4) +
  scale_x_discrete(labels = c("Wildtype", "Transgenic"))
vp 

# Pooled variance
Sp2 <- ((m-1)*var(X) + (n-1)*var(Y))/(m+n-2)
Sp2
## [1] 140.9267
# t-interval : equal variance

Int <- c( mean(X) - mean(Y) - qt(0.99,df=m+n-2)*sqrt(Sp2)*sqrt(1/m+1/n), mean(X) - mean(Y) + qt(0.99,df=m+n-2)*sqrt(Sp2)*sqrt(1/m+1/n))

library(knitr)
kable(Int,digits = 4)
x
0.1572
8.3201

\(t-\)interval using Welch and Satterthwaite’s approximation

library(knitr)
# Reading the data online
mosquitoes<-read.delim("https://www.math.arizona.edu/~jwatkins//mosquitoes.txt")

# Displaying the data
boxplot(mosquitoes)

# First sample
X <- mosquitoes[!is.na(mosquitoes[,1]),1]
m <- length(X)

# Second sample
Y <- mosquitoes[!is.na(mosquitoes[,2]),2]
n <- length(Y)

# Effective degrees of freedom
nu <- (var(X)/m + var(Y)/n)^2/( var(X)^2/(m^2*(m-1)) + var(Y)^2/(n^2*(n-1)) )
kable(nu,digits = 4)
x
169.6653
# Studentised variance
SV <- var(X)/m + var(Y)/n

# t-interval : equal variance

Int <- c( mean(X) - mean(Y) - qt(0.975,df=nu)*sqrt(SV), mean(X) - mean(Y) + qt(0.975,df=nu)*sqrt(SV))

kable(Int,digits = 4)
x
0.7676
7.7096
# Calculating this same interval using the command t-test
t.test(X,Y,conf.level=0.95)$ conf.int
## [1] 0.7676486 7.7096242
## attr(,"conf.level")
## [1] 0.95

Both approaches give similar results in this case, although the second approach produces a confidence interval that is farther from zero.

Test of Significance

Consider testing a difference in the mean life span. This is

\[H_0: \mu_1 = \mu_2 \,\,\,\,\,\, vs. \,\,\,\,\,\, H_1: \mu_1 \neq \mu_2\]

library(knitr)
# Reading the data online
mosquitoes<-read.delim("https://www.math.arizona.edu/~jwatkins//mosquitoes.txt")

# Displaying the data
boxplot(mosquitoes)

# First sample
X <- mosquitoes[!is.na(mosquitoes[,1]),1]
m <- length(X)

# Second sample
Y <- mosquitoes[!is.na(mosquitoes[,2]),2]
n <- length(Y)

# Effective degrees of freedom
nu <- (var(X)/m + var(Y)/n)^2/( var(X)^2/(m^2*(m-1)) + var(Y)^2/(n^2*(n-1)) )
kable(nu,digits = 4)
x
169.6653
# Studentised variance
SV <- var(X)/m + var(Y)/n

# t-statistic
t0 <- (mean(X) - mean(Y))/sqrt(SV)


# P-value
P <- pt(-t0 , df = nu) + 1-pt(t0 , df = nu)
kable(P,digits = 4)
x
0.017
# Calculating the p-value the command t-test
t.test(X,Y,conf.level=0.95, alternative = "two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  X and Y
## t = 2.4106, df = 169.67, p-value = 0.01699
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.7676486 7.7096242
## sample estimates:
## mean of x mean of y 
##  20.78409  16.54545

We now want to test if the transgenic modification of the mosquitoes reduces the mean life span. This is

\[H_0: \mu_1 \leq \mu_2 \,\,\,\,\,\, vs. \,\,\,\,\,\, H_1: \mu_1 > \mu_2\]

library(knitr)
# Reading the data online
mosquitoes<-read.delim("https://www.math.arizona.edu/~jwatkins//mosquitoes.txt")

# First sample
X <- mosquitoes[!is.na(mosquitoes[,1]),1]
m <- length(X)

# Second sample
Y <- mosquitoes[!is.na(mosquitoes[,2]),2]
n <- length(Y)

# Effective degrees of freedom
nu <- (var(X)/m + var(Y)/n)^2/( var(X)^2/(m^2*(m-1)) + var(Y)^2/(n^2*(n-1)) )
kable(nu,digits = 4)
x
169.6653
# Studentised variance
SV <- var(X)/m + var(Y)/n

# t-statistic
t0 <- (mean(X) - mean(Y))/sqrt(SV)


# P-value
P <-  1-pt(t0 , df = nu)
kable(P,digits = 4)
x
0.0085
# Calculating the p-value the command t-test
t.test(X,Y,conf.level=0.95, alternative = "greater")
## 
##  Welch Two Sample t-test
## 
## data:  X and Y
## t = 2.4106, df = 169.67, p-value = 0.008497
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  1.330591      Inf
## sample estimates:
## mean of x mean of y 
##  20.78409  16.54545