#### The Bootstrapped two-sample-test
# Version 1.2 beta
## R. Schwamborn, March 2022
## Short description ------------
# This script provides three simple functions to calculate a "p" value for the difference in means or medians.
# The "Bootstrapped two-sample-test" gives a non-parametric two-sided "p" value (we do not assume normal distribution), based on the bootstrap posteriors for the estimate (mean or median).
# Inputs: Two original data sets "A" and "B", or two bootstrap posteriors from samples: "A" and "B" (see functions below).
# You may use mean or median. Instead of mean(X), you may also use - median(X), just replace.
# Finally, a commont-test is also presented to verify the result, for the simple example below.
## Three practical functions are provided:
# a.) "orig.data.boot.median.two.sample.test()": gives a "p" value for the difference in medians for two samples (smaller one first), based on two original data sets
# b.) "orig.data.boot.mean.two.sample.test()": gives a "p" value for the differnce in mean for two samples (smaller one first), based two original data sets
# c.) "boot.p.two.sample.diff.posteriors()"" : gives a "p" value for the difference between central estimates (mean or median) of two samples (smaller one first), based on two bootstrap posteriors (inputs: two posteriors for mean, or two posteriors for median)
## 1. Example -------------
# 1.a data used for Example ------------
A.set <- rnorm (100, mean = 10, sd = 9) # data set "A"
mean(A.set)
## [1] 9.209861
sd(A.set)
## [1] 9.529414
B.set <- A.set + 3 # data set "B", has larger values
mean(B.set)
## [1] 12.20986
sd(B.set)
## [1] 9.529414
(diffAB <- mean(B.set) -mean(A.set)) # difference, to be tested
## [1] 3
t.test(A.set, B.set) # common t-test (test for diffence in means)
##
## Welch Two Sample t-test
##
## data: A.set and B.set
## t = -2.2261, df = 198, p-value = 0.02714
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -5.6576144 -0.3423856
## sample estimates:
## mean of x mean of y
## 9.209861 12.209861
# 1.b. generate a posterior for sample A ------ ---------
Boot.runs = 20000 # number of Bootstrap runs (for A and B)
test.samA <- A.set
n = length(test.samA) # sample size (300)
boot.samplesA = matrix(sample(test.samA, size = Boot.runs * n,
replace = TRUE),
Boot.runs, n)
# generates a matrix with 10000 lines and 3000 columns (samples)
dim(boot.samplesA) # number of rows and columns
## [1] 20000 100
## [1] 10000 100
#View(as.data.frame(boot.samples))
boot.statisticsA = apply(boot.samplesA, 1, mean)
# calculates the mean for each line (i.e., for each sampe)
# gives a vector with 10000 means
mean(boot.statisticsA) # overall mean of means: -0.03081875
## [1] 9.211802
sd (boot.statisticsA) # 0.10326
## [1] 0.947432
# 1.c. generate posterior for sample B -------- --------------
test.samB <- B.set
n = length(test.samB) # sample size (300)
boot.samplesB = matrix(sample(test.samB, size = Boot.runs * n,
replace = TRUE),
Boot.runs, n)
# generates a matrix with 10000 lines and 3000 columns (samples)
dim(boot.samplesB) # number of rows and columns
## [1] 20000 100
## [1] 10000 100
#View(as.data.frame(boot.samples))
boot.statisticsB = apply(boot.samplesB, 1, mean)
# calculates the mean for each line (i.e., for each sampe)
# gives a vector with 10000 means
mean(boot.statisticsA) # overall mean of posterior for means:
## [1] 9.211802
sd (boot.statisticsA) # std. dev. of posterior for means = std. error for the mean
## [1] 0.947432
sd(test.samA)/sqrt(length(test.samA))# parametric std. error for the mean
## [1] 0.9529414
mean(boot.statisticsB) # overall mean of posterior for means:
## [1] 12.21226
sd (boot.statisticsB) # std. dev. of posterior for means = std. error for the mean
## [1] 0.9501579
sd(test.samB)/sqrt(length(test.samB))# parametric std. error for the mean
## [1] 0.9529414
# 1.d difference between posteriors ------------
boot.statisticsDiff <- boot.statisticsB - boot.statisticsA
mean(boot.statisticsDiff) # overall mean of diff: 0.37291
## [1] 3.000459
sd (boot.statisticsDiff) # sd of diff: 0.1547
## [1] 1.342625
# 1.e. 95% confid. interval for diff. -----------
# Non-parametr. ordinary Bootstrap (no assumptions, uses quantiles):
quantile(boot.statisticsDiff, c(0.025, 0.975)) # correct 95% quantiles OK!
## 2.5% 97.5%
## 0.3685648 5.5984869
# 1.f calculate "p" value from bootstrap diff. ----------
length(boot.statisticsDiff [boot.statisticsDiff< 0]) # 7 are below 0
## [1] 262
N.below.zero <- length(boot.statisticsDiff [boot.statisticsDiff< 0]) # 7 are below 0
N.total <- length(boot.statisticsDiff ) # all
( one.sided.p_value <- N.below.zero / N.total) # one-sided "p" value
## [1] 0.0131
(two.sided.p_value.boot <- 2* (N.below.zero / N.total)) # two-sided "p" value
## [1] 0.0262
two.sided.p_value.boot # "p" value for difference in means (from bootsrapping)
## [1] 0.0262
# 1.h compare this "p" value with t-test result (gives approx. the same "p" value)----
t.test(A.set, B.set) # two-sided "p" value from t-test
##
## Welch Two Sample t-test
##
## data: A.set and B.set
## t = -2.2261, df = 198, p-value = 0.02714
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -5.6576144 -0.3423856
## sample estimates:
## mean of x mean of y
## 9.209861 12.209861
p.result.t.test <- t.test(A.set, B.set)
p.result.t.test$p.value #two-sided "p" value from t-test
## [1] 0.02713655
#compare t-test with bootsrap result
two.sided.p_value.boot # "p" value from bootstrap,for difference in means
## [1] 0.0262
p.result.t.test$p.value # "p" value from t-test, for difference in means
## [1] 0.02713655
## 2. Functions --------------
# Three practical functions are provided:
# 2.a) orig.data.boot.mean.two.sample.test -------------------
# provides a two-sided "p" value for the difference in mean for two samples
# Important: smaller sample first
# Uses the two original data sets for camparisons of two means
orig.data.boot.mean.two.sample.test <- function (A.inp, B.inp, Boot_runs = 10000) {
# generate posterior for sample A
Boot.runs = Boot_runs # number of Bootstrap runs (for A and B)
test.samA <- A.inp
n = length(test.samA) # sample size (300)
boot.samplesA = matrix(sample(test.samA, size = Boot.runs * n,
replace = TRUE),
Boot.runs, n)
# generates a matrix with 10000 lines and 3000 columns (samples)
dim(boot.samplesA) # number of rows and columns
## [1] 10000 100
#View(as.data.frame(boot.samples))
boot.statisticsA = apply(boot.samplesA, 1, mean)
# calculates the mean for each line (i.e., for each sampe)
# gives a vector with 10000 means
mean(boot.statisticsA) # overall mean of means: -0.03081875
sd (boot.statisticsA) # 0.10326
# generate posterior for sample B
test.samB <- B.inp
n = length(test.samB) # sample size (300)
boot.samplesB = matrix(sample(test.samB, size = Boot.runs * n,
replace = TRUE),
Boot.runs, n)
# generates a matrix with 10000 lines and 3000 columns (samples)
dim(boot.samplesB) # number of rows and columns
## [1] 10000 100
#View(as.data.frame(boot.samples))
boot.statisticsB = apply(boot.samplesB, 1, mean)
# calculates the mean for each line (i.e., for each sampe)
# gives a vector with 10000 means
mean(boot.statisticsA) # overall mean of means: -0.03081875
sd (boot.statisticsA) # 0.10326
mean(boot.statisticsB) # overall mean of means: 0.340888
sd (boot.statisticsB) # 0.103
# difference between posteriors ------------
boot.statisticsDiff <- boot.statisticsB - boot.statisticsA
mean(boot.statisticsDiff) # overall mean of diff: 0.37291
sd (boot.statisticsDiff) # sd of diff: 0.1547
# conf int. for diff.:
# Non-parametr. ordinary Bootstrap (no assumptions, uses quantiles):
# quantile(boot.statisticsDiff, c(0.025, 0.975)) # correct 95% quantiles OK!
# calculate "p" value from bootstrap diff. ----------
length(boot.statisticsDiff [boot.statisticsDiff< 0]) # 7 are below 0
N.below.zero <- length(boot.statisticsDiff [boot.statisticsDiff< 0]) # 7 are below 0
N.total <- length(boot.statisticsDiff ) # all
( one.sided.p_value <- N.below.zero / N.total) # one-sided "p" value
(two.sided.p_value <- 2* (N.below.zero / N.total)) # two-sided "p" value
c("two.sided.p_value", two.sided.p_value) # output
}
orig.data.boot.mean.two.sample.test(A.set, B.set , Boot_runs = 50000)
## [1] "two.sided.p_value" "0.02404"
t.test(A.set, B.set)
##
## Welch Two Sample t-test
##
## data: A.set and B.set
## t = -2.2261, df = 198, p-value = 0.02714
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -5.6576144 -0.3423856
## sample estimates:
## mean of x mean of y
## 9.209861 12.209861
# 2.b.) orig.data.boot.median.two.sample.test --------------
# Provides a two-sided "p" value for the difference in medians for two samples
# Important: smaller sample first
# Uses the two original data sets for camparisons of two medians
orig.data.boot.median.two.sample.test <- function (A.inp, B.inp, Boot_runs = 10000) {
# generate posterior for sample A
Boot.runs = Boot_runs # number of Bootstrap runs (for A and B)
test.samA <- A.inp
n = length(test.samA) # sample size (300)
boot.samplesA = matrix(sample(test.samA, size = Boot.runs * n,
replace = TRUE),
Boot.runs, n)
# generates a matrix with 10000 lines and 3000 columns (samples)
dim(boot.samplesA) # number of rows and columns
## [1] 10000 100
#View(as.data.frame(boot.samples))
boot.statisticsA = apply(boot.samplesA, 1, median)
# calculates the median for each line (i.e., for each sampe)
# gives a vector with 10000 medians
median(boot.statisticsA) # overall median of medians: -0.03081875
sd (boot.statisticsA) # 0.10326
# generate posterior for sample B
test.samB <- B.inp
n = length(test.samB) # sample size (300)
boot.samplesB = matrix(sample(test.samB, size = Boot.runs * n,
replace = TRUE),
Boot.runs, n)
# generates a matrix with 10000 lines and 3000 columns (samples)
dim(boot.samplesB) # number of rows and columns
## [1] 10000 100
#View(as.data.frame(boot.samples))
boot.statisticsB = apply(boot.samplesB, 1, median)
# calculates the median for each line (i.e., for each sampe)
# gives a vector with 10000 medians
median(boot.statisticsA) # overall median of medians: -0.03081875
sd (boot.statisticsA) # 0.10326
median(boot.statisticsB) # overall median of medians: 0.340888
sd (boot.statisticsB) # 0.103
# difference between posteriors ------------
boot.statisticsDiff <- boot.statisticsB - boot.statisticsA
median(boot.statisticsDiff) # overall median of diff: 0.37291
sd (boot.statisticsDiff) # sd of diff: 0.1547
# conf int. for diff.:
# Non-parametr. ordinary Bootstrap (no assumptions, uses quantiles):
# quantile(boot.statisticsDiff, c(0.025, 0.975)) # correct 95% quantiles OK!
# calculate "p" value from bootstrap diff. ----------
length(boot.statisticsDiff [boot.statisticsDiff< 0]) # 7 are below 0
N.below.zero <- length(boot.statisticsDiff [boot.statisticsDiff< 0]) # 7 are below 0
N.total <- length(boot.statisticsDiff ) # all
( one.sided.p_value <- N.below.zero / N.total) # one-sided "p" value
(two.sided.p_value <- 2* (N.below.zero / N.total)) # two-sided "p" value
c("two.sided.p_value", two.sided.p_value) # output
}
# "p" value of the bootstrap median test
(p.diff.median.boot <- orig.data.boot.median.two.sample.test(A.set, B.set , Boot_runs = 20000))
## [1] "two.sided.p_value" "0.0867"
# 2.c.) boot.p.two.sample.diff.posteriors -------------------
# Provides a two-sided "p" value for the difference in mean (or medians) for two samples
# Important: smaller sample first
# Uses two posterior distributions (for means or for medians)
boot.p.two.sample.diff.posteriors <- function (boot.post.A, boot.post.B) {
# difference between posteriors
boot.statisticsA <- boot.post.A # posterior for sample A
boot.statisticsB <- boot.post.B # posterior for sample B
boot.statisticsDiff <- boot.statisticsB - boot.statisticsA
# calculate "p" value from bootstrap diff.
N.below.zero <- length(boot.statisticsDiff [boot.statisticsDiff< 0]) # 7 are below 0
N.total <- length(boot.statisticsDiff ) # all
( one.sided.p_value <- N.below.zero / N.total) # one-sided "p" value
(two.sided.p_value <- 2* (N.below.zero / N.total)) # two-sided "p" value
c("two.sided.p_value", two.sided.p_value) # output
}
boot.p.two.sample.diff.posteriors(boot.statisticsA, boot.statisticsB )
## [1] "two.sided.p_value" "0.0262"