library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
The experiment: Generate 2x2 matrices using the create_matrix() function and compute p-values from Chi-square and Fisher’s test. Store results (including the original matrices) in the data frame experiment.
create_matrix <- function() matrix(trunc(runif(4L) * 100) + 1000, ncol = 2)
set.seed(123)
experiment <-
data_frame(i=1:1000) %>%
rowwise %>%
do({
M <- create_matrix()
data_frame(chisq = chisq.test(M)$p.value, fisher = fisher.test(M)$p.value, M = list(M))
}) %>%
mutate(diff = chisq - fisher)
Summary of experiment:
experiment %>%
select(-M) %>%
summary
## chisq fisher diff
## Min. :0.01233 Min. :0.01157 Min. :-5.261e-06
## 1st Qu.:0.27956 1st Qu.:0.27137 1st Qu.: 2.545e-03
## Median :0.54981 Median :0.53878 Median : 7.315e-03
## Mean :0.53502 Mean :0.52683 Mean : 8.191e-03
## 3rd Qu.:0.79176 3rd Qu.:0.78134 3rd Qu.: 1.256e-02
## Max. :1.00000 Max. :1.00000 Max. : 2.428e-02
Histogram of differences:
experiment %>%
ggplot(aes(x = diff)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
For (at least) this matrix, the Chi-square test returns a lower p-value:
experiment %>%
arrange(diff) %>%
head(1) %>%
"[["("M")
## [[1]]
## [,1] [,2]
## [1,] 1025 1041
## [2,] 1048 1032