# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ~ CRP 241 Module 5 Day 2 ~ 2 x 2 Tables ~
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Low Birth Weight Data:
# The data set consists of data collected on 189 women to identify the risk factors
# associated with the birth of a low birth weight baby. The data set was collected
# at the Baystate medical Center in Springfield, Massachusetts.
# Variables:
# (1) Low Birth Weight (LBW)
# - LBW = 1 when birth weight<2500 grams
# - LBW = 0 when birth weight>=2500 grams
# (2) Smoke
# - Smoke = 1 if mother smoked during pregnancy
# - Smole = 0 if mother did NOT smoke during pregnancy
# Homogeneity of Proportions Test:
# - Enter conditional probablities as input
prop.test(x=c(30,29),n=c(74,115),correct=FALSE)
##
## 2-sample test for equality of proportions without continuity
## correction
##
## data: c(30, 29) out of c(74, 115)
## X-squared = 4.9237, df = 1, p-value = 0.02649
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.01607177 0.29039121
## sample estimates:
## prop 1 prop 2
## 0.4054054 0.2521739
# Chi-square Test of Independence:
# - Enter 2x2 table as input
table <- rbind(c(30,44),c(29,86))
table
## [,1] [,2]
## [1,] 30 44
## [2,] 29 86
# - Check assumptions
chisq.test(x=table,correct=FALSE)$expected
## [,1] [,2]
## [1,] 23.10053 50.89947
## [2,] 35.89947 79.10053
# - Run the test
chisq.test(x=table,correct=FALSE)
##
## Pearson's Chi-squared test
##
## data: table
## X-squared = 4.9237, df = 1, p-value = 0.02649
# To estimate the odds and risk ratios, need to use functions
# in the 'epitools' package
# - Note: If you have never installed this package, uncomment the
# ?install.packages() command and run it.
#install.packages('epitools')
library(epitools)
# Estimate Odds Ratio:
# - Option 1: The "correct" approach
oddsratio(table,rev='both',method='wald')
## $data
## Outcome
## Predictor Disease2 Disease1 Total
## Exposed2 86 29 115
## Exposed1 44 30 74
## Total 130 59 189
##
## $measure
## odds ratio with 95% C.I.
## Predictor estimate lower upper
## Exposed2 1.000000 NA NA
## Exposed1 2.021944 1.08066 3.783112
##
## $p.value
## two-sided
## Predictor midp.exact fisher.exact chi.square
## Exposed2 NA NA NA
## Exposed1 0.02914865 0.0361765 0.02649064
##
## $correction
## [1] FALSE
##
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# - Option 2: Removing the rev='both' input, which is equivalent
oddsratio(table,method='wald')
## $data
## Outcome
## Predictor Disease1 Disease2 Total
## Exposed1 30 44 74
## Exposed2 29 86 115
## Total 59 130 189
##
## $measure
## odds ratio with 95% C.I.
## Predictor estimate lower upper
## Exposed1 1.000000 NA NA
## Exposed2 2.021944 1.08066 3.783112
##
## $p.value
## two-sided
## Predictor midp.exact fisher.exact chi.square
## Exposed1 NA NA NA
## Exposed2 0.02914865 0.0361765 0.02649064
##
## $correction
## [1] FALSE
##
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# Estimate Risk Ratio:
riskratio(table,rev='both',method='wald')
## $data
## Outcome
## Predictor Disease2 Disease1 Total
## Exposed2 86 29 115
## Exposed1 44 30 74
## Total 130 59 189
##
## $measure
## risk ratio with 95% C.I.
## Predictor estimate lower upper
## Exposed2 1.000000 NA NA
## Exposed1 1.607642 1.057812 2.443262
##
## $p.value
## two-sided
## Predictor midp.exact fisher.exact chi.square
## Exposed2 NA NA NA
## Exposed1 0.02914865 0.0361765 0.02649064
##
## $correction
## [1] FALSE
##
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# - Careful! Removing the rev='both' input is NOT equivalent for RRs!
riskratio(table,method='wald')
## $data
## Outcome
## Predictor Disease1 Disease2 Total
## Exposed1 30 44 74
## Exposed2 29 86 115
## Total 59 130 189
##
## $measure
## risk ratio with 95% C.I.
## Predictor estimate lower upper
## Exposed1 1.000000 NA NA
## Exposed2 1.257708 1.013374 1.560953
##
## $p.value
## two-sided
## Predictor midp.exact fisher.exact chi.square
## Exposed1 NA NA NA
## Exposed2 0.02914865 0.0361765 0.02649064
##
## $correction
## [1] FALSE
##
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# Kidney Stones Data:
# The data set consists of data collected on 1052 sequentially admitted
# patients who were treated for kidney stones.
# Data Dictionary:
# 1. Treatment (ESWL vs. Open Surgery vs. PNL)
# 2. Treat.Outcome (1 = Successful vs. Not = 2)
# Download and load the kidney stone dataset used in lecture:
download.file("http://www.duke.edu/~sgrambow/crp241data/kidney_stones.RData",
destfile = "kidney_stones.RData",quiet=TRUE,mode="wb",cacheOK=FALSE)
load("kidney_stones.RData")
# Compute counts for two way (3 x 2) frequency table
table(kidney_stones$Treatment,kidney_stones$Treat.Outcome)
##
## 1 2
## ESWL 316 36
## Open Surgery 273 77
## PNL 289 61
# Chi-square Test of Independence:
# - Check assumptions
chisq.test(table(kidney_stones$Treatment,kidney_stones$Treat.Outcome),correct=FALSE)$expected
##
## 1 2
## ESWL 293.7795 58.22053
## Open Surgery 292.1103 57.88973
## PNL 292.1103 57.88973
# - Run the test
chisq.test(table(kidney_stones$Treatment,kidney_stones$Treat.Outcome),correct=FALSE)
##
## Pearson's Chi-squared test
##
## data: table(kidney_stones$Treatment, kidney_stones$Treat.Outcome)
## X-squared = 17.92, df = 2, p-value = 0.0001284
# Break it down into three 2 x 2 tables to explain global chi-square p-value
# - Use the rbind() and c() functions to be build the subsets
# (1) OS and ESWL Groups
# --- OS vs. ESWL
oddsratio(rbind(c(316,36),c(273,77)),rev='columns',method='wald')
## $data
## Outcome
## Predictor Disease2 Disease1 Total
## Exposed1 36 316 352
## Exposed2 77 273 350
## Total 113 589 702
##
## $measure
## odds ratio with 95% C.I.
## Predictor estimate lower upper
## Exposed1 1.0000000 NA NA
## Exposed2 0.4039125 0.2633856 0.6194164
##
## $p.value
## two-sided
## Predictor midp.exact fisher.exact chi.square
## Exposed1 NA NA NA
## Exposed2 2.051104e-05 2.250976e-05 2.197737e-05
##
## $correction
## [1] FALSE
##
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# --- ESWL vs. OS
oddsratio(rbind(c(316,36),c(273,77)),rev='both',method='wald')
## $data
## Outcome
## Predictor Disease2 Disease1 Total
## Exposed2 77 273 350
## Exposed1 36 316 352
## Total 113 589 702
##
## $measure
## odds ratio with 95% C.I.
## Predictor estimate lower upper
## Exposed2 1.000000 NA NA
## Exposed1 2.475783 1.614423 3.796715
##
## $p.value
## two-sided
## Predictor midp.exact fisher.exact chi.square
## Exposed2 NA NA NA
## Exposed1 2.051104e-05 2.250976e-05 2.197737e-05
##
## $correction
## [1] FALSE
##
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# (2) PNL and ESWL Groups
# --- PNL vs. ESWL
oddsratio(rbind(c(316,36),c(289,61)),rev='columns',method='wald')
## $data
## Outcome
## Predictor Disease2 Disease1 Total
## Exposed1 36 316 352
## Exposed2 61 289 350
## Total 97 605 702
##
## $measure
## odds ratio with 95% C.I.
## Predictor estimate lower upper
## Exposed1 1.0000000 NA NA
## Exposed2 0.5397385 0.3470083 0.839512
##
## $p.value
## two-sided
## Predictor midp.exact fisher.exact chi.square
## Exposed1 NA NA NA
## Exposed2 0.005791516 0.006198166 0.005700501
##
## $correction
## [1] FALSE
##
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# --- ESWL vs. PNL
oddsratio(rbind(c(316,36),c(289,61)),rev='both',method='wald')
## $data
## Outcome
## Predictor Disease2 Disease1 Total
## Exposed2 61 289 350
## Exposed1 36 316 352
## Total 97 605 702
##
## $measure
## odds ratio with 95% C.I.
## Predictor estimate lower upper
## Exposed2 1.000000 NA NA
## Exposed1 1.852749 1.191168 2.881775
##
## $p.value
## two-sided
## Predictor midp.exact fisher.exact chi.square
## Exposed2 NA NA NA
## Exposed1 0.005791516 0.006198166 0.005700501
##
## $correction
## [1] FALSE
##
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# (3) OS and PNL Groups
oddsratio(rbind(c(273,77),c(289,61)),rev='columns',method='wald')
## $data
## Outcome
## Predictor Disease2 Disease1 Total
## Exposed1 77 273 350
## Exposed2 61 289 350
## Total 138 562 700
##
## $measure
## odds ratio with 95% C.I.
## Predictor estimate lower upper
## Exposed1 1.000000 NA NA
## Exposed2 1.336276 0.9188954 1.943238
##
## $p.value
## two-sided
## Predictor midp.exact fisher.exact chi.square
## Exposed1 NA NA NA
## Exposed2 0.1300246 0.1539742 0.1284954
##
## $correction
## [1] FALSE
##
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
oddsratio(rbind(c(273,77),c(289,61)),rev='both',method='wald')
## $data
## Outcome
## Predictor Disease2 Disease1 Total
## Exposed2 61 289 350
## Exposed1 77 273 350
## Total 138 562 700
##
## $measure
## odds ratio with 95% C.I.
## Predictor estimate lower upper
## Exposed2 1.0000000 NA NA
## Exposed1 0.7483485 0.5146049 1.088263
##
## $p.value
## two-sided
## Predictor midp.exact fisher.exact chi.square
## Exposed2 NA NA NA
## Exposed1 0.1300246 0.1539742 0.1284954
##
## $correction
## [1] FALSE
##
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# - Note: oddsratio() function does NOT give global p-value!
oddsratio(table(kidney_stones),rev='columns',method='wald')
## $data
## Treat.Outcome
## Treatment 2 1 Total
## ESWL 36 316 352
## Open Surgery 77 273 350
## PNL 61 289 350
## Total 174 878 1052
##
## $measure
## odds ratio with 95% C.I.
## Treatment estimate lower upper
## ESWL 1.0000000 NA NA
## Open Surgery 0.4039125 0.2633856 0.6194164
## PNL 0.5397385 0.3470083 0.8395120
##
## $p.value
## two-sided
## Treatment midp.exact fisher.exact chi.square
## ESWL NA NA NA
## Open Surgery 2.051104e-05 2.250976e-05 2.197737e-05
## PNL 5.791516e-03 6.198166e-03 5.700501e-03
##
## $correction
## [1] FALSE
##
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# End of Porgram