Chi square test with counts

Đôi khi trong những tình huống phân tích dữ liệu chúng ta cần kiểm định sự khác biệt số liệu trong các bảng bằng Chi square test.

Chẳng hạn trong bảng sau, chúng ta cần biết khác biệt về giới tính giữa các nhóm có ý nghĩa thống kê hay không.

M <- as.table(rbind(c(732, 425, 317), c(484, 251, 318)))
Xsq <- chisq.test(M)  # Prints test summary
Xsq
Xsq$p.value
Xsq$observed   # observed counts (same as M)
Xsq$expected   # expected counts under the null
Xsq$residuals  # Pearson residuals
Xsq$stdres     # standardized residuals

p = 2.319e-06 < 0.001

Một số công thức khác

## Effect of simulating p-values using Monte Carlo simulation
x <- matrix(c(12, 5, 7, 7), ncol = 2)
chisq.test(x)$p.value           # 0.4233
chisq.test(x, simulate.p.value = TRUE, B = 10000)$p.value # around 0.29!
x <- c(A = 20, B = 15, C = 25)
chisq.test(x)
chisq.test(as.table(x))      
x <- c(89,37,30,28,2)
p <- c(40,20,20,15,5)
try(
  chisq.test(x, p = p)                # gives an error
)
chisq.test(x, p = p, rescale.p = TRUE)
p <- c(0.40, 0.20, 0.20, 0.19, 0.01)

# Expected count in category 5
# is 1.86 < 5 ==> chi square approx.
chisq.test(x, p = p)            #               maybe doubtful, but is ok!
chisq.test(x, p = p, simulate.p.value = TRUE)

prop.test

Khác biệt tỉ lệ giữa các nhóm có ý nghĩa không

Khác biệt số đếm so với tỉ lệ p = 0.87 của dân số

prop.test(x=25,n=48, p=0.87, alternative='less')

prop.test(x=25, n=48, p = 0.87,
          alternative = c("two.sided", "less", "greater"),
          conf.level = 0.95, correct = TRUE)

Fisher Exact test

# Fisher's exact test
x <- matrix(c(22, 42, 16, 1, 98, 98, 98, 98), byrow = TRUE, nrow = 2, ncol = 4);

fisher.test(x)

Khoảng tin cậy 95% của 2 tỉ lệ

library(pgirmess)
x<-c(2,10,7,8,7) # eg: number of positive cases
y<-c(56,22,7,20,5)# eg: number of negative cases

CI(cbind(x,y), conf.level=0.95)

Value

A 3 column matrix.

Cách khác:

x <- matrix(c(10, 15, 27, 31, 10, 18), ncol = 2)
CI(x, conf.level= 0.95)

Tính thủ công

# 95CI for proportion differnce
p1 = 0
p2 = 0.012
n1 = 98
n2 = 150
z = 1.96
(p1 - p2)  -  z * sqrt((p1*(1 - p1)/n1 + p2*(1 - p2)/n2))    # LOW 95CI
(p1 - p2)  +  z * sqrt((p1*(1 - p1)/n1 + p2*(1 - p2)/n2))    # HIGH 95CI

confidence intervals for a binomial proportion

library(DescTools)
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
binom.test(7, 21,
           0.5,
           alternative="two.sided",
           conf.level=0.95)

    Exact binomial test

data:  7 and 21
number of successes = 7, number of trials = 21, p-value =
0.1892
alternative hypothesis: true probability of success is not equal to 0.5
95 percent confidence interval:
 0.1458769 0.5696755
sample estimates:
probability of success 
             0.3333333 
observed = c(7, 14, 25, 37)
total = sum(observed)

BinomCI(observed, total,
        conf.level = 0.95,
        method = "clopper-pearson")
           est     lwr.ci    upr.ci
x.1 0.08433735 0.03458229 0.1660766
x.2 0.16867470 0.09539963 0.2667839
x.3 0.30120482 0.20531236 0.4117773
x.4 0.44578313 0.33655876 0.5589902

confidence intervals for a binomial proportion


BinomCI(7, 21,
        conf.level = 0.95,
        method = "clopper-pearson")
           est    lwr.ci    upr.ci
[1,] 0.3333333 0.1458769 0.5696755

CI for proportion difference

library(PropCIs)
# p1 = 7/12, p2 = 13/17
diffscoreci(7, 21, 13, 17,
            conf.level=0.95)



data:  

95 percent confidence interval:
 -0.6685985 -0.1103804

confidence intervals for a multinomial proportion

Sex Count Female 10
Male 9 Other 1 No answer 1 ———- —– Total 21

library(DescTools)

observed = c(10, 9, 1, 1)

MultinomCI(observed,
           conf.level=0.95,
           method="sisonglaz")
            est    lwr.ci    upr.ci
[1,] 0.47619048 0.2857143 0.7009460
[2,] 0.42857143 0.2380952 0.6533270
[3,] 0.04761905 0.0000000 0.2723746
[4,] 0.04761905 0.0000000 0.2723746

CIs for numeric variables

library(epiDisplay)
data(Oswego)
.data <- Oswego
attach(.data)
# logical variable
ci(ill)
# numeric variable
ci(age)
# factor
ci(sex=="M")
ci(sex=="F")
detach(.data)

Thank you for reading

LS0tDQp0aXRsZTogIlNPTUUgU1RBVElTVElDQUwgQ0FMQ1VMQVRPUlMiDQphdXRob3I6ICJUaGlldSBOZ3V5ZW4iDQpkYXRlOiAiOS8zMC8yMDIxIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQogIA0KZWRpdG9yX29wdGlvbnM6DQogIGNodW5rX291dHB1dF90eXBlOiBpbmxpbmUNCi0tLQ0KDQpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0NCmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSkNCmBgYA0KDQojIyBDaGkgc3F1YXJlIHRlc3Qgd2l0aCBjb3VudHMNCg0KxJDDtGkga2hpIHRyb25nIG5o4buvbmcgdMOsbmggaHXhu5FuZyBwaMOibiB0w61jaCBk4buvIGxp4buHdSBjaMO6bmcgdGEgY+G6p24ga2nhu4NtIMSR4buLbmggc+G7sSBraMOhYyBiaeG7h3Qgc+G7kSBsaeG7h3UgdHJvbmcgY8OhYyBi4bqjbmcgYuG6sW5nIENoaSBzcXVhcmUgdGVzdC4NCg0KQ2jhurNuZyBo4bqhbiB0cm9uZyBi4bqjbmcgc2F1LCBjaMO6bmcgdGEgY+G6p24gYmnhur90IGtow6FjIGJp4buHdCB24buBIGdp4bubaSB0w61uaCBnaeG7r2EgY8OhYyBuaMOzbSBjw7Mgw70gbmdoxKlhIHRo4buRbmcga8OqIGhheSBraMO0bmcuDQoNCiFbXShpbWFnZXMvdGFiMS5qcGcgIlRhYmxlIDEiKQ0KDQpgYGB7ciB9DQpNIDwtIGFzLnRhYmxlKHJiaW5kKGMoNzMyLCA0MjUsIDMxNyksIGMoNDg0LCAyNTEsIDMxOCkpKQ0KWHNxIDwtIGNoaXNxLnRlc3QoTSkgICMgUHJpbnRzIHRlc3Qgc3VtbWFyeQ0KWHNxDQpYc3EkcC52YWx1ZQ0KWHNxJG9ic2VydmVkICAgIyBvYnNlcnZlZCBjb3VudHMgKHNhbWUgYXMgTSkNClhzcSRleHBlY3RlZCAgICMgZXhwZWN0ZWQgY291bnRzIHVuZGVyIHRoZSBudWxsDQpYc3EkcmVzaWR1YWxzICAjIFBlYXJzb24gcmVzaWR1YWxzDQpYc3Ekc3RkcmVzICAgICAjIHN0YW5kYXJkaXplZCByZXNpZHVhbHMNCg0KYGBgDQoNCnAgPSAyLjMxOWUtMDYgXDwgMC4wMDENCg0KTeG7mXQgc+G7kSBjw7RuZyB0aOG7qWMga2jDoWMNCg0KYGBge3J9DQojIyBFZmZlY3Qgb2Ygc2ltdWxhdGluZyBwLXZhbHVlcyB1c2luZyBNb250ZSBDYXJsbyBzaW11bGF0aW9uDQp4IDwtIG1hdHJpeChjKDEyLCA1LCA3LCA3KSwgbmNvbCA9IDIpDQpjaGlzcS50ZXN0KHgpJHAudmFsdWUgICAgICAgICAgICMgMC40MjMzDQpjaGlzcS50ZXN0KHgsIHNpbXVsYXRlLnAudmFsdWUgPSBUUlVFLCBCID0gMTAwMDApJHAudmFsdWUgIyBhcm91bmQgMC4yOSENCg0KYGBgDQoNCmBgYHtyfQ0KeCA8LSBjKEEgPSAyMCwgQiA9IDE1LCBDID0gMjUpDQpjaGlzcS50ZXN0KHgpDQpjaGlzcS50ZXN0KGFzLnRhYmxlKHgpKSAgICAgIA0KDQpgYGANCg0KYGBge3J9DQp4IDwtIGMoODksMzcsMzAsMjgsMikNCnAgPC0gYyg0MCwyMCwyMCwxNSw1KQ0KdHJ5KA0KICBjaGlzcS50ZXN0KHgsIHAgPSBwKSAgICAgICAgICAgICAgICAjIGdpdmVzIGFuIGVycm9yDQopDQpjaGlzcS50ZXN0KHgsIHAgPSBwLCByZXNjYWxlLnAgPSBUUlVFKQ0KDQoNCmBgYA0KDQpgYGB7cn0NCnAgPC0gYygwLjQwLCAwLjIwLCAwLjIwLCAwLjE5LCAwLjAxKQ0KDQojIEV4cGVjdGVkIGNvdW50IGluIGNhdGVnb3J5IDUNCiMgaXMgMS44NiA8IDUgPT0+IGNoaSBzcXVhcmUgYXBwcm94Lg0KY2hpc3EudGVzdCh4LCBwID0gcCkgICAgICAgICAgICAjICAgICAgICAgICAgICAgbWF5YmUgZG91YnRmdWwsIGJ1dCBpcyBvayENCmNoaXNxLnRlc3QoeCwgcCA9IHAsIHNpbXVsYXRlLnAudmFsdWUgPSBUUlVFKQ0KDQoNCmBgYA0KDQpgYGB7fQ0KYGBgDQoNCiMjIHByb3AudGVzdA0KDQpLaMOhYyBiaeG7h3QgdOG7iSBs4buHIGdp4buvYSBjw6FjIG5ow7NtIGPDsyDDvSBuZ2jEqWEga2jDtG5nDQoNCmBgYHtyIHByZXNzdXJlLCBlY2hvPUZBTFNFfQ0KIyBwcm9wLnRlc3QNCnNtb2tlcnMgIDwtIGMoIDgzLCA5MCwgMTI5LCA3MCApDQpwYXRpZW50cyA8LSBjKCA4NiwgOTMsIDEzNiwgODIgKQ0KDQpwcm9wLnRlc3Qoc21va2VycywgcGF0aWVudHMpDQoNCmBgYA0KDQpLaMOhYyBiaeG7h3Qgc+G7kSDEkeG6v20gc28gduG7m2kgdOG7iSBs4buHIHAgPSAwLjg3IGPhu6dhIGTDom4gc+G7kQ0KDQpgYGB7cn0NCnByb3AudGVzdCh4PTI1LG49NDgsIHA9MC44NywgYWx0ZXJuYXRpdmU9J2xlc3MnKQ0KDQpwcm9wLnRlc3QoeD0yNSwgbj00OCwgcCA9IDAuODcsDQogICAgICAgICAgYWx0ZXJuYXRpdmUgPSBjKCJ0d28uc2lkZWQiLCAibGVzcyIsICJncmVhdGVyIiksDQogICAgICAgICAgY29uZi5sZXZlbCA9IDAuOTUsIGNvcnJlY3QgPSBUUlVFKQ0KYGBgDQoNCiMjIEZpc2hlciBFeGFjdCB0ZXN0DQoNCmBgYHtyfQ0KIyBGaXNoZXIncyBleGFjdCB0ZXN0DQp4IDwtIG1hdHJpeChjKDIyLCA0MiwgMTYsIDEsIDk4LCA5OCwgOTgsIDk4KSwgYnlyb3cgPSBUUlVFLCBucm93ID0gMiwgbmNvbCA9IDQpOw0KDQpmaXNoZXIudGVzdCh4KQ0KDQpgYGANCg0KIyMgS2hv4bqjbmcgdGluIGPhuq15IDk1JSBj4bunYSAyIHThu4kgbOG7hw0KDQpgYGB7cn0NCmxpYnJhcnkocGdpcm1lc3MpDQp4PC1jKDIsMTAsNyw4LDcpICMgZWc6IG51bWJlciBvZiBwb3NpdGl2ZSBjYXNlcw0KeTwtYyg1NiwyMiw3LDIwLDUpIyBlZzogbnVtYmVyIG9mIG5lZ2F0aXZlIGNhc2VzDQoNCkNJKGNiaW5kKHgseSksIGNvbmYubGV2ZWw9MC45NSkNCmBgYA0KDQoqKlZhbHVlKioNCg0KQSAzIGNvbHVtbiBtYXRyaXguDQoNCi0gICBDb2x1bW4gMTogcGVyY2VudGFnZSBlc3RpbWF0ZQ0KDQotICAgQ29sdW1uIDI6IGxvd2VyIGxpbWl0IG9mIHRoZSBjb25maWRlbmNlIGludGVydmFsDQoNCi0gICBjb2x1bW4gMzogdXBwZXIgbGltaXQgb2YgdGhlIGNvbmZpZGVuY2UgaW50ZXJ2YWwNCg0KQ8OhY2gga2jDoWM6DQoNCmBgYHtyfQ0KeCA8LSBtYXRyaXgoYygxMCwgMTUsIDI3LCAzMSwgMTAsIDE4KSwgbmNvbCA9IDIpDQpDSSh4LCBjb25mLmxldmVsPSAwLjk1KQ0KYGBgDQoNClTDrW5oIHRo4bunIGPDtG5nDQoNCmBgYHtyfQ0KIyA5NUNJIGZvciBwcm9wb3J0aW9uIGRpZmZlcm5jZQ0KcDEgPSAwDQpwMiA9IDAuMDEyDQpuMSA9IDk4DQpuMiA9IDE1MA0KeiA9IDEuOTYNCihwMSAtIHAyKSAgLSAgeiAqIHNxcnQoKHAxKigxIC0gcDEpL24xICsgcDIqKDEgLSBwMikvbjIpKSAgICAjIExPVyA5NUNJDQoocDEgLSBwMikgICsgIHogKiBzcXJ0KChwMSooMSAtIHAxKS9uMSArIHAyKigxIC0gcDIpL24yKSkgICAgIyBISUdIIDk1Q0kNCg0KYGBgDQoNCiMjIGNvbmZpZGVuY2UgaW50ZXJ2YWxzIGZvciBhIGJpbm9taWFsIHByb3BvcnRpb24NCg0KYGBge3J9DQpsaWJyYXJ5KERlc2NUb29scykNCmJpbm9tLnRlc3QoNywgMjEsDQogICAgICAgICAgIDAuNSwNCiAgICAgICAgICAgYWx0ZXJuYXRpdmU9InR3by5zaWRlZCIsDQogICAgICAgICAgIGNvbmYubGV2ZWw9MC45NSkNCg0KYGBgDQoNCmBgYHtyfQ0Kb2JzZXJ2ZWQgPSBjKDcsIDE0LCAyNSwgMzcpDQp0b3RhbCA9IHN1bShvYnNlcnZlZCkNCg0KQmlub21DSShvYnNlcnZlZCwgdG90YWwsDQogICAgICAgIGNvbmYubGV2ZWwgPSAwLjk1LA0KICAgICAgICBtZXRob2QgPSAiY2xvcHBlci1wZWFyc29uIikNCmBgYA0KDQpjb25maWRlbmNlIGludGVydmFscyBmb3IgYSBiaW5vbWlhbCBwcm9wb3J0aW9uDQoNCmBgYHtyfQ0KDQpCaW5vbUNJKDcsIDIxLA0KICAgICAgICBjb25mLmxldmVsID0gMC45NSwNCiAgICAgICAgbWV0aG9kID0gImNsb3BwZXItcGVhcnNvbiIpDQpgYGANCkNJIGZvciBwcm9wb3J0aW9uIGRpZmZlcmVuY2UNCg0KYGBge3J9DQpsaWJyYXJ5KFByb3BDSXMpDQojIHAxID0gNy8xMiwgcDIgPSAxMy8xNw0KZGlmZnNjb3JlY2koNywgMjEsIDEzLCAxNywNCiAgICAgICAgICAgIGNvbmYubGV2ZWw9MC45NSkNCmBgYA0KIyMgY29uZmlkZW5jZSBpbnRlcnZhbHMgZm9yIGEgbXVsdGlub21pYWwgcHJvcG9ydGlvbg0KU2V4ICAgICAgICAgIENvdW50DQpGZW1hbGUgICAgICAgMTAgICAgDQpNYWxlICAgICAgICAgIDkNCk90aGVyICAgICAgICAgMQ0KTm8gYW5zd2VyICAgICAxDQotLS0tLS0tLS0tICAgLS0tLS0NClRvdGFsICAgICAgICAyMQ0KDQoNCmBgYHtyfQ0KbGlicmFyeShEZXNjVG9vbHMpDQoNCm9ic2VydmVkID0gYygxMCwgOSwgMSwgMSkNCg0KTXVsdGlub21DSShvYnNlcnZlZCwNCiAgICAgICAgICAgY29uZi5sZXZlbD0wLjk1LA0KICAgICAgICAgICBtZXRob2Q9InNpc29uZ2xheiIpDQpgYGANCg0KDQoNCiMjIENJcyBmb3IgbnVtZXJpYyB2YXJpYWJsZXMNCg0KYGBge3J9DQpsaWJyYXJ5KGVwaURpc3BsYXkpDQpkYXRhKE9zd2VnbykNCi5kYXRhIDwtIE9zd2Vnbw0KYXR0YWNoKC5kYXRhKQ0KIyBsb2dpY2FsIHZhcmlhYmxlDQpjaShpbGwpDQojIG51bWVyaWMgdmFyaWFibGUNCmNpKGFnZSkNCiMgZmFjdG9yDQpjaShzZXg9PSJNIikNCmNpKHNleD09IkYiKQ0KZGV0YWNoKC5kYXRhKQ0KYGBgDQoNClRoYW5rIHlvdSBmb3IgcmVhZGluZw0K