E_Pearson_N-1_chi_square

# see discussion here about inaccuracy of Fisher exact test and Yates' correction
# https://stats.stackexchange.com/questions/14226/given-the-power-of-computers-these-days-is-there-ever-a-reason-to-do-a-chi-squa
# https://stats.stackexchange.com/questions/100976/n-1-pearsons-chi-square-in-r

# enter cell counts as matrix - here it is 2x2 but this can be edited
data.cells <- matrix(c( 5,15,
                        15,5 ), 2,2, byrow=TRUE)
data.cells

##      [,1] [,2]
## [1,]    5   15
## [2,]   15    5

# standard K Pearson Chi-Square
chisq.test(data.cells, simulate.p.value=FALSE, correct = FALSE)

## 
##  Pearson's Chi-squared test
## 
## data:  data.cells
## X-squared = 10, df = 1, p-value = 0.001565

# compare with simulated p value (which tends to be conservative)
chisq.test(data.cells, simulate.p.value=TRUE, B=1e+6-1)

## 
##  Pearson's Chi-squared test with simulated p-value (based on
##  999999 replicates)
## 
## data:  data.cells
## X-squared = 10, df = NA, p-value = 0.00377

# E Perason's N-1 correction of the chi-squared test
# accurate when all expected values > 1

epcs.test <- function(data.matrix){
    uncorrected.test <- suppressWarnings(chisq.test(data.cells, simulate.p.value=FALSE, correct = FALSE))
    N <- sum(data.matrix)
    corrected.stat <- uncorrected.test$stat[[1]] * (N-1)/N
    pval <- pchisq(corrected.stat, uncorrected.test$par, lower.tail = FALSE)
    output.list <- list('Egon Pearson Chi-Square' = corrected.stat, 'df'=prod(dim(data.matrix)-1), 'p'=pval,
    'Smallest expected value (should be greater than 1)' = min(uncorrected.test$expected))
    return(output.list)
}

epcs.test(data.cells)

## $`Egon Pearson Chi-Square`
## [1] 9.75
## 
## $df
## [1] 1
## 
## $p
## [1] 0.001793227
## 
## $`Smallest expected value (should be greater than 1)`
## [1] 10

# check with small expected values
data.cells <- matrix(c(0,7,6,1),2,2)
data.cells

##      [,1] [,2]
## [1,]    0    6
## [2,]    7    1

epcs.test(data.cells)

## $`Egon Pearson Chi-Square`
## [1] 9.75
## 
## $df
## [1] 1
## 
## $p
## [1] 0.001793227
## 
## $`Smallest expected value (should be greater than 1)`
## [1] 3

E_Pearson_N-1_chi_square_test.R

thom

Sun Oct 14 17:36:25 2018