The Bootstrapped two-sample test

#### The Bootstrapped two-sample-test 
# Version 1.2 beta
## R. Schwamborn, March 2022

##  Short description ------------

#  This script provides three simple functions to calculate a "p" value for the difference in  means or medians. 
#  The "Bootstrapped two-sample-test" gives a non-parametric two-sided "p" value (we do not assume normal distribution), based on the bootstrap posteriors for the estimate (mean or median).
#  Inputs:  Two original data sets "A" and "B", or two bootstrap posteriors from  samples: "A" and "B" (see functions below).
#  You may use mean or median. Instead of mean(X), you may also use  - median(X), just replace. 
#  Finally, a commont-test is also presented to verify the result, for the simple example below.

##   Three  practical functions are provided:

# a.)  "orig.data.boot.median.two.sample.test()": gives  a "p" value for the difference in medians for two samples (smaller one first), based on two original data sets 

# b.)  "orig.data.boot.mean.two.sample.test()": gives a "p" value for the differnce in mean  for two samples (smaller one first), based two original data sets  

# c.)   "boot.p.two.sample.diff.posteriors()"" : gives a "p" value for the difference between  central estimates (mean or median) of two samples (smaller one first), based on two bootstrap posteriors (inputs: two posteriors for mean,  or two posteriors for median)


## 1. Example -------------

# 1.a data used for Example ------------

 A.set <- rnorm (100, mean = 10, sd = 9) #  data set "A"
mean(A.set)

## [1] 9.209861

sd(A.set)

## [1] 9.529414

 B.set <- A.set + 3 #  data set "B", has larger values
mean(B.set)

## [1] 12.20986

sd(B.set)

## [1] 9.529414

(diffAB <- mean(B.set) -mean(A.set)) # difference, to be tested

## [1] 3

t.test(A.set, B.set) # common t-test (test for diffence in means)

## 
##  Welch Two Sample t-test
## 
## data:  A.set and B.set
## t = -2.2261, df = 198, p-value = 0.02714
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5.6576144 -0.3423856
## sample estimates:
## mean of x mean of y 
##  9.209861 12.209861

# 1.b. generate a posterior for sample A ------ ---------

Boot.runs = 20000 # number of Bootstrap runs (for A and B)

test.samA <- A.set


n = length(test.samA) # sample size (300)


boot.samplesA = matrix(sample(test.samA, size = Boot.runs * n, 
                              replace = TRUE),
                       Boot.runs, n)
# generates a matrix with 10000 lines and 3000 columns (samples)

dim(boot.samplesA) # number of rows and columns

## [1] 20000   100

## [1] 10000    100

#View(as.data.frame(boot.samples))

boot.statisticsA = apply(boot.samplesA, 1, mean)
# calculates the mean for each line (i.e., for each sampe)
# gives a vector with  10000 means

mean(boot.statisticsA) # overall mean of means: -0.03081875

## [1] 9.211802

sd (boot.statisticsA) # 0.10326

## [1] 0.947432

# 1.c. generate posterior for sample B -------- --------------


test.samB <- B.set


n = length(test.samB) # sample size (300)


boot.samplesB = matrix(sample(test.samB, size = Boot.runs * n, 
                              replace = TRUE),
                       Boot.runs, n)
# generates a matrix with 10000 lines and 3000 columns (samples)

dim(boot.samplesB) # number of rows and columns

## [1] 20000   100

## [1] 10000    100

#View(as.data.frame(boot.samples))

boot.statisticsB = apply(boot.samplesB, 1, mean)
# calculates the mean for each line (i.e., for each sampe)
# gives a vector with  10000 means



mean(boot.statisticsA) # overall mean of posterior for means:

## [1] 9.211802

sd (boot.statisticsA) # std. dev. of posterior for means = std. error for the mean

## [1] 0.947432

sd(test.samA)/sqrt(length(test.samA))# parametric  std. error for the mean

## [1] 0.9529414

mean(boot.statisticsB) # overall mean of posterior for means:

## [1] 12.21226

sd (boot.statisticsB) # std. dev. of posterior for means = std. error for the mean

## [1] 0.9501579

sd(test.samB)/sqrt(length(test.samB))# parametric  std. error for the mean

## [1] 0.9529414

# 1.d  difference between posteriors ------------

boot.statisticsDiff <- boot.statisticsB - boot.statisticsA

mean(boot.statisticsDiff) # overall mean of diff: 0.37291

## [1] 3.000459

sd (boot.statisticsDiff) # sd of diff: 0.1547

## [1] 1.342625

# 1.e. 95% confid. interval for diff. -----------

# Non-parametr. ordinary Bootstrap (no assumptions, uses quantiles):
quantile(boot.statisticsDiff, c(0.025, 0.975)) # correct 95% quantiles OK!

##      2.5%     97.5% 
## 0.3685648 5.5984869

# 1.f calculate "p" value  from bootstrap diff.  ----------


length(boot.statisticsDiff [boot.statisticsDiff< 0]) # 7 are below 0

## [1] 262

N.below.zero <- length(boot.statisticsDiff [boot.statisticsDiff< 0]) # 7 are below 0 

N.total <- length(boot.statisticsDiff ) # all 

( one.sided.p_value <- N.below.zero / N.total) # one-sided "p" value

## [1] 0.0131

(two.sided.p_value.boot <- 2* (N.below.zero / N.total)) # two-sided "p" value

## [1] 0.0262

two.sided.p_value.boot # "p" value for difference in means (from bootsrapping)

## [1] 0.0262

# 1.h compare this "p" value with t-test result (gives approx. the same "p" value)----

t.test(A.set, B.set) # two-sided "p" value from t-test

## 
##  Welch Two Sample t-test
## 
## data:  A.set and B.set
## t = -2.2261, df = 198, p-value = 0.02714
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5.6576144 -0.3423856
## sample estimates:
## mean of x mean of y 
##  9.209861 12.209861

p.result.t.test <- t.test(A.set, B.set)
p.result.t.test$p.value  #two-sided "p" value from t-test

## [1] 0.02713655

#compare t-test with bootsrap result

two.sided.p_value.boot # "p" value from bootstrap,for difference in means

## [1] 0.0262

p.result.t.test$p.value # "p" value from t-test, for difference in means

## [1] 0.02713655

## 2. Functions --------------

#   Three   practical functions are provided:

# 2.a)    orig.data.boot.mean.two.sample.test -------------------
# provides a two-sided "p" value for the difference in mean for two samples 
# Important: smaller sample  first
# Uses the two  original data sets for camparisons of two means


orig.data.boot.mean.two.sample.test <- function (A.inp, B.inp, Boot_runs = 10000) {
  
  # generate posterior for sample A 
  
  Boot.runs = Boot_runs # number of Bootstrap runs (for A and B)
  
  test.samA <- A.inp
  
  
  n = length(test.samA) # sample size (300)
  
  
  boot.samplesA = matrix(sample(test.samA, size = Boot.runs * n, 
                                replace = TRUE),
                         Boot.runs, n)
  # generates a matrix with 10000 lines and 3000 columns (samples)
  
  dim(boot.samplesA) # number of rows and columns
  
  ## [1] 10000    100
  
  #View(as.data.frame(boot.samples))
  
  boot.statisticsA = apply(boot.samplesA, 1, mean)
  # calculates the mean for each line (i.e., for each sampe)
  # gives a vector with  10000 means
  
  mean(boot.statisticsA) # overall mean of means: -0.03081875
  sd (boot.statisticsA) # 0.10326
  
  
  # generate posterior for sample B 
  
  
  test.samB <- B.inp
  
  
  n = length(test.samB) # sample size (300)
  
  
  boot.samplesB = matrix(sample(test.samB, size = Boot.runs * n, 
                                replace = TRUE),
                         Boot.runs, n)
  # generates a matrix with 10000 lines and 3000 columns (samples)
  
  dim(boot.samplesB) # number of rows and columns
  
  ## [1] 10000    100
  
  #View(as.data.frame(boot.samples))
  
  boot.statisticsB = apply(boot.samplesB, 1, mean)
  # calculates the mean for each line (i.e., for each sampe)
  # gives a vector with  10000 means
  
  
  mean(boot.statisticsA) # overall mean of means: -0.03081875
  sd (boot.statisticsA) # 0.10326
  
  
  mean(boot.statisticsB) # overall mean of means: 0.340888
  sd (boot.statisticsB) # 0.103
  
  
  # difference between posteriors ------------
  
  boot.statisticsDiff <- boot.statisticsB - boot.statisticsA
  
  mean(boot.statisticsDiff) # overall mean of diff: 0.37291
  sd (boot.statisticsDiff) # sd of diff: 0.1547
  
  # conf int. for diff.:
  
  # Non-parametr. ordinary Bootstrap (no assumptions, uses quantiles):
 # quantile(boot.statisticsDiff, c(0.025, 0.975)) # correct 95% quantiles OK!
  
  # calculate "p" value  from bootstrap diff.  ----------
  
  length(boot.statisticsDiff [boot.statisticsDiff< 0]) # 7 are below 0 
  
  N.below.zero <- length(boot.statisticsDiff [boot.statisticsDiff< 0]) # 7 are below 0 
  
  N.total <- length(boot.statisticsDiff ) # all 
  
  ( one.sided.p_value <- N.below.zero / N.total) # one-sided "p" value
  
  (two.sided.p_value <- 2* (N.below.zero / N.total)) # two-sided "p" value
  
  c("two.sided.p_value", two.sided.p_value)  # output
  
  
}

orig.data.boot.mean.two.sample.test(A.set, B.set , Boot_runs = 50000)

## [1] "two.sided.p_value" "0.02404"

t.test(A.set, B.set)

## 
##  Welch Two Sample t-test
## 
## data:  A.set and B.set
## t = -2.2261, df = 198, p-value = 0.02714
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5.6576144 -0.3423856
## sample estimates:
## mean of x mean of y 
##  9.209861 12.209861

# 2.b.)   orig.data.boot.median.two.sample.test --------------

# Provides a two-sided "p" value for the difference in medians for two samples 
# Important: smaller sample  first
# Uses the two  original data sets for camparisons of two medians


orig.data.boot.median.two.sample.test <- function (A.inp, B.inp, Boot_runs = 10000) {
  
  # generate posterior for sample A 
  
  Boot.runs = Boot_runs # number of Bootstrap runs (for A and B)
  
  test.samA <- A.inp
  
  
  n = length(test.samA) # sample size (300)
  
  
  boot.samplesA = matrix(sample(test.samA, size = Boot.runs * n, 
                                replace = TRUE),
                         Boot.runs, n)
  # generates a matrix with 10000 lines and 3000 columns (samples)
  
  dim(boot.samplesA) # number of rows and columns
  
  ## [1] 10000    100
  
  #View(as.data.frame(boot.samples))
  
  boot.statisticsA = apply(boot.samplesA, 1, median)
  # calculates the median for each line (i.e., for each sampe)
  # gives a vector with  10000 medians
  
  median(boot.statisticsA) # overall median of medians: -0.03081875
  sd (boot.statisticsA) # 0.10326
  
  
  # generate posterior for sample B 
  
  
  test.samB <- B.inp
  
  
  n = length(test.samB) # sample size (300)
  
  
  boot.samplesB = matrix(sample(test.samB, size = Boot.runs * n, 
                                replace = TRUE),
                         Boot.runs, n)
  # generates a matrix with 10000 lines and 3000 columns (samples)
  
  dim(boot.samplesB) # number of rows and columns
  
  ## [1] 10000    100
  
  #View(as.data.frame(boot.samples))
  
  boot.statisticsB = apply(boot.samplesB, 1, median)
  # calculates the median for each line (i.e., for each sampe)
  # gives a vector with  10000 medians
  
  
  
  median(boot.statisticsA) # overall median of medians: -0.03081875
  sd (boot.statisticsA) # 0.10326
  
  
  median(boot.statisticsB) # overall median of medians: 0.340888
  sd (boot.statisticsB) # 0.103
  
  
  # difference between posteriors ------------
  
  boot.statisticsDiff <- boot.statisticsB - boot.statisticsA
  
  median(boot.statisticsDiff) # overall median of diff: 0.37291
  sd (boot.statisticsDiff) # sd of diff: 0.1547
  
  # conf int. for diff.:
  
  # Non-parametr. ordinary Bootstrap (no assumptions, uses quantiles):
 # quantile(boot.statisticsDiff, c(0.025, 0.975)) # correct 95% quantiles OK!
  
  # calculate "p" value  from bootstrap diff.  ----------
  
  length(boot.statisticsDiff [boot.statisticsDiff< 0]) # 7 are below 0 
  
  N.below.zero <- length(boot.statisticsDiff [boot.statisticsDiff< 0]) # 7 are below 0 
  
  N.total <- length(boot.statisticsDiff ) # all 
  
  ( one.sided.p_value <- N.below.zero / N.total) # one-sided "p" value
  
  (two.sided.p_value <- 2* (N.below.zero / N.total)) # two-sided "p" value
  
  c("two.sided.p_value", two.sided.p_value) # output
  
  
}


# "p" value of the bootstrap median test
(p.diff.median.boot <- orig.data.boot.median.two.sample.test(A.set, B.set , Boot_runs = 20000))

## [1] "two.sided.p_value" "0.0867"

# 2.c.)   boot.p.two.sample.diff.posteriors -------------------

# Provides a two-sided "p" value for the difference in mean (or medians) for two samples 
# Important: smaller sample  first
# Uses two  posterior distributions (for means or for medians)  


boot.p.two.sample.diff.posteriors <- function (boot.post.A, boot.post.B) {
  
  # difference between posteriors
  
   boot.statisticsA <- boot.post.A # posterior for sample A
   boot.statisticsB <- boot.post.B # posterior for sample B
  
  
  boot.statisticsDiff <- boot.statisticsB - boot.statisticsA
  
 
  # calculate "p" value  from bootstrap diff.  
  
  N.below.zero <- length(boot.statisticsDiff [boot.statisticsDiff< 0]) # 7 are below 0 
  
  N.total <- length(boot.statisticsDiff ) # all 
  
  ( one.sided.p_value <- N.below.zero / N.total) # one-sided "p" value
  
  (two.sided.p_value <- 2* (N.below.zero / N.total)) # two-sided "p" value
  
  c("two.sided.p_value", two.sided.p_value)  # output
  
}

boot.p.two.sample.diff.posteriors(boot.statisticsA, boot.statisticsB )

## [1] "two.sided.p_value" "0.0262"

The Bootstrapped two-sample test

Ralf Schwamborn

March 20, 2022