# see discussion here about inaccuracy of Fisher exact test and Yates' correction
# https://stats.stackexchange.com/questions/14226/given-the-power-of-computers-these-days-is-there-ever-a-reason-to-do-a-chi-squa
# https://stats.stackexchange.com/questions/100976/n-1-pearsons-chi-square-in-r
# enter cell counts as matrix - here it is 2x2 but this can be edited
data.cells <- matrix(c( 5,15,
15,5 ), 2,2, byrow=TRUE)
data.cells
## [,1] [,2]
## [1,] 5 15
## [2,] 15 5
# standard K Pearson Chi-Square
chisq.test(data.cells, simulate.p.value=FALSE, correct = FALSE)
##
## Pearson's Chi-squared test
##
## data: data.cells
## X-squared = 10, df = 1, p-value = 0.001565
# compare with simulated p value (which tends to be conservative)
chisq.test(data.cells, simulate.p.value=TRUE, B=1e+6-1)
##
## Pearson's Chi-squared test with simulated p-value (based on
## 999999 replicates)
##
## data: data.cells
## X-squared = 10, df = NA, p-value = 0.00377
# E Perason's N-1 correction of the chi-squared test
# accurate when all expected values > 1
epcs.test <- function(data.matrix){
uncorrected.test <- suppressWarnings(chisq.test(data.cells, simulate.p.value=FALSE, correct = FALSE))
N <- sum(data.matrix)
corrected.stat <- uncorrected.test$stat[[1]] * (N-1)/N
pval <- pchisq(corrected.stat, uncorrected.test$par, lower.tail = FALSE)
output.list <- list('Egon Pearson Chi-Square' = corrected.stat, 'df'=prod(dim(data.matrix)-1), 'p'=pval,
'Smallest expected value (should be greater than 1)' = min(uncorrected.test$expected))
return(output.list)
}
epcs.test(data.cells)
## $`Egon Pearson Chi-Square`
## [1] 9.75
##
## $df
## [1] 1
##
## $p
## [1] 0.001793227
##
## $`Smallest expected value (should be greater than 1)`
## [1] 10
# check with small expected values
data.cells <- matrix(c(0,7,6,1),2,2)
data.cells
## [,1] [,2]
## [1,] 0 6
## [2,] 7 1
epcs.test(data.cells)
## $`Egon Pearson Chi-Square`
## [1] 9.75
##
## $df
## [1] 1
##
## $p
## [1] 0.001793227
##
## $`Smallest expected value (should be greater than 1)`
## [1] 3