rm(list = ls())
mat <- matrix(c(4,5,14,7),ncol=2)
rownames(mat) <- c("Made First","Missed First")
colnames(mat) <- c("Made Second","Missed Second")
mat
## Made Second Missed Second
## Made First 4 14
## Missed First 5 7
\(H_{0}: p_{first} = p_{second}\)
\(H_{A}: p_{first} \neq p_{second}\)
mtest <- mcnemar.test(mat)
p.mcnemar <- mtest$p.value
mcNemar(X = mat, alpha = 0.05, force = TRUE, digits = 3)
##
## Matched Pairs Analysis: McNemar's Chi^2 Statistic and Odds Ratio
##
## McNemar's Chi^2 Statistic (corrected for continuity) = 3.368 which has a p-value of: 0.066
##
## McNemar's Odds Ratio (b/c): 2.8
## 95% Confidence Limits for the OR are: [1.051, 24.746]
Test Statistic: \(\chi^{2} = 3.3684211\)
P-value: 0.0664574
Conclusion: After obtaining a p-value of 0.0664574, we fail to reject the null lacking statistically significant evidence in the direction of the null hypothesis. Furthermore, using the data provided we have found that there is a 6.645742% chance of observing a data sample as or more extreme than this sample under the null hypothesis.
perm <- fisher.test(mat); perm
##
## Fisher's Exact Test for Count Data
##
## data: mat
## p-value = 0.4181
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.05986525 2.61005193
## sample estimates:
## odds ratio
## 0.4131166
p.fisher <- perm$p.value
McNemar’s test provided a p-value of 0.0664574 while the permutation \(\chi^{2}\) test provided a p-value of 0.4181063. Both tests lead to the same conclusion at the confidence level \(\alpha = 0.05\) despite their respective p-values differing by 0.3516489; the major difference in p-value is most likely due to McNemar’s test being constructed for dichotomous, paired data while the permutation test is more general.
rm(list = ls())
x <- c(55,140,91,122,111,185,203,101,76,145,95,101,196,45,299,226,65,70,196,72,121,171,151,113,112,67,276,125,100,81,122,71,158,78,162,128,96,79,67,119)
set.seed(2634)
n <- length(x); nsim <- 10000
t.hat <- mean(x); t.boots <- rep(NA, nsim)
for(i in 1:nsim){
boots <- x[sample(x = 1:n, size = n, replace = TRUE)]
t.boots[i] <- mean(boots)
}
MSE <- mean((t.boots-t.hat)^2)
SE_mean <- sd(t.boots)
set.seed(2634)
n <- length(x); nsim <- 10000
t.hat <- sd(x); t.boots <- rep(NA, nsim)
for(i in 1:nsim){
boots <- x[sample(x = 1:n, size = n, replace = TRUE)]
t.boots[i] <- sd(boots)
}
SD <- mean((t.boots-t.hat)^2)
SE_sd <- sd(t.boots)
set.seed(2634)
n <- length(x); nsim <- 10000
t.hat <- quantile(x, probs = 0.95); t.boots <- rep(NA, nsim)
for(i in 1:nsim){
boots <- x[sample(x = 1:n, size = n, replace = TRUE)]
t.boots[i] <- quantile(boots, probs = 0.95)
}
perc <- mean((t.boots-t.hat)^2)
SE_perc <- sd(t.boots)
MSE for sample mean = 83.6459714 with SE = 9.1458081
MSE for SD = 69.509916 with SE = 8.214312
MSE for 95th Percentile = 1346.088194 with SE = 36.4678481
set.seed(2634)
n <- 15
x <- rnorm(n, mean = 5, sd = 6)
mean(x)
## [1] 5.583215
true.var <- 36/n; true.var
## [1] 2.4
set.seed(2634)
t.hat <- rep(NA, times = nsim)
for(i in 1:nsim){
boots <- x[sample(1:n, n, replace = TRUE)]
t.boots[i] <- mean(boots)
}
boots.est <- var(t.boots); boots.est
## [1] 3.488056
kable(data.frame(TheoreticalVariance = true.var,
BootstrapVariance = boots.est))
| TheoreticalVariance | BootstrapVariance |
|---|---|
| 2.4 | 3.488056 |
set.seed(2634)
y <- rbinom(100,1,0.75)
x <- rep(NA,100)
for(i in 1:100){
if (y[i] == 1) {
x[i] = rnorm(n = 1, mean = 20, sd = 5)
}
else {
x[i] = rnorm(n = 1, mean = 10, sd = 10)
}
}
t.hat <- mean(x); t.hat
## [1] 16.88462
x.var <- ((0.75 * (5 + 20^2)) + (0.25*(10 + 10^2))) - (((0.75*20)+(0.25*10))^2)
true.var <- x.var/100; true.var
## [1] 0.25
set.seed(2634)
t.boots <- rep(NA, times = nsim)
for (i in 1:nsim){
boots <- x[sample(1:100, 100, replace = TRUE)]
t.boots[i] <- mean(boots)
}
boots.var <- var(t.boots)
kable(data.frame(TheoreticalVariance = true.var,
BootstrapVariance = boots.var))
| TheoreticalVariance | BootstrapVariance |
|---|---|
| 0.25 | 0.645868 |