# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ~ CRP 241 Module 5 Day 2 ~ 2 x 2 Tables ~
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Low Birth Weight Data:
# The data set consists of data collected on 189 women to identify the risk factors 
# associated with the birth of a low birth weight baby. The data set was collected 
# at the Baystate medical Center in Springfield, Massachusetts. 

# Variables:
# (1) Low Birth Weight (LBW)
#     - LBW = 1 when birth weight<2500 grams
#     - LBW = 0 when birth weight>=2500 grams
# (2) Smoke
#     - Smoke = 1 if mother smoked during pregnancy
#     - Smole = 0 if mother did NOT smoke during pregnancy

# Homogeneity of Proportions Test: 
# - Enter conditional probablities as input 
prop.test(x=c(30,29),n=c(74,115),correct=FALSE)
## 
##  2-sample test for equality of proportions without continuity
##  correction
## 
## data:  c(30, 29) out of c(74, 115)
## X-squared = 4.9237, df = 1, p-value = 0.02649
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.01607177 0.29039121
## sample estimates:
##    prop 1    prop 2 
## 0.4054054 0.2521739
# Chi-square Test of Independence: 
# - Enter 2x2 table as input
table <- rbind(c(30,44),c(29,86))
table
##      [,1] [,2]
## [1,]   30   44
## [2,]   29   86
# - Check assumptions 
chisq.test(x=table,correct=FALSE)$expected 
##          [,1]     [,2]
## [1,] 23.10053 50.89947
## [2,] 35.89947 79.10053
# - Run the test 
chisq.test(x=table,correct=FALSE)
## 
##  Pearson's Chi-squared test
## 
## data:  table
## X-squared = 4.9237, df = 1, p-value = 0.02649
# To estimate the odds and risk ratios, need to use functions
# in the 'epitools' package
# - Note: If you have never installed this package, uncomment the
#         ?install.packages() command and run it.
#install.packages('epitools')
library(epitools)

# Estimate Odds Ratio: 
# - Option 1: The "correct" approach 
oddsratio(table,rev='both',method='wald')
## $data
##           Outcome
## Predictor  Disease2 Disease1 Total
##   Exposed2       86       29   115
##   Exposed1       44       30    74
##   Total         130       59   189
## 
## $measure
##           odds ratio with 95% C.I.
## Predictor  estimate   lower    upper
##   Exposed2 1.000000      NA       NA
##   Exposed1 2.021944 1.08066 3.783112
## 
## $p.value
##           two-sided
## Predictor  midp.exact fisher.exact chi.square
##   Exposed2         NA           NA         NA
##   Exposed1 0.02914865    0.0361765 0.02649064
## 
## $correction
## [1] FALSE
## 
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# - Option 2: Removing the rev='both' input, which is equivalent
oddsratio(table,method='wald')
## $data
##           Outcome
## Predictor  Disease1 Disease2 Total
##   Exposed1       30       44    74
##   Exposed2       29       86   115
##   Total          59      130   189
## 
## $measure
##           odds ratio with 95% C.I.
## Predictor  estimate   lower    upper
##   Exposed1 1.000000      NA       NA
##   Exposed2 2.021944 1.08066 3.783112
## 
## $p.value
##           two-sided
## Predictor  midp.exact fisher.exact chi.square
##   Exposed1         NA           NA         NA
##   Exposed2 0.02914865    0.0361765 0.02649064
## 
## $correction
## [1] FALSE
## 
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# Estimate Risk Ratio: 
riskratio(table,rev='both',method='wald')
## $data
##           Outcome
## Predictor  Disease2 Disease1 Total
##   Exposed2       86       29   115
##   Exposed1       44       30    74
##   Total         130       59   189
## 
## $measure
##           risk ratio with 95% C.I.
## Predictor  estimate    lower    upper
##   Exposed2 1.000000       NA       NA
##   Exposed1 1.607642 1.057812 2.443262
## 
## $p.value
##           two-sided
## Predictor  midp.exact fisher.exact chi.square
##   Exposed2         NA           NA         NA
##   Exposed1 0.02914865    0.0361765 0.02649064
## 
## $correction
## [1] FALSE
## 
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# - Careful! Removing the rev='both' input is NOT equivalent for RRs!
riskratio(table,method='wald')
## $data
##           Outcome
## Predictor  Disease1 Disease2 Total
##   Exposed1       30       44    74
##   Exposed2       29       86   115
##   Total          59      130   189
## 
## $measure
##           risk ratio with 95% C.I.
## Predictor  estimate    lower    upper
##   Exposed1 1.000000       NA       NA
##   Exposed2 1.257708 1.013374 1.560953
## 
## $p.value
##           two-sided
## Predictor  midp.exact fisher.exact chi.square
##   Exposed1         NA           NA         NA
##   Exposed2 0.02914865    0.0361765 0.02649064
## 
## $correction
## [1] FALSE
## 
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# Kidney Stones Data:
# The data set consists of data collected on 1052 sequentially admitted
# patients who were treated for kidney stones. 

# Data Dictionary: 
# 1.  Treatment          (ESWL vs. Open Surgery vs. PNL)
# 2.  Treat.Outcome      (1 = Successful vs. Not = 2)

# Download and load the kidney stone dataset used in lecture: 
download.file("http://www.duke.edu/~sgrambow/crp241data/kidney_stones.RData",
              destfile = "kidney_stones.RData",quiet=TRUE,mode="wb",cacheOK=FALSE)
load("kidney_stones.RData")

# Compute counts for two way (3 x 2) frequency table
table(kidney_stones$Treatment,kidney_stones$Treat.Outcome)
##               
##                  1   2
##   ESWL         316  36
##   Open Surgery 273  77
##   PNL          289  61
# Chi-square Test of Independence: 
# - Check assumptions 
chisq.test(table(kidney_stones$Treatment,kidney_stones$Treat.Outcome),correct=FALSE)$expected 
##               
##                       1        2
##   ESWL         293.7795 58.22053
##   Open Surgery 292.1103 57.88973
##   PNL          292.1103 57.88973
# - Run the test 
chisq.test(table(kidney_stones$Treatment,kidney_stones$Treat.Outcome),correct=FALSE)
## 
##  Pearson's Chi-squared test
## 
## data:  table(kidney_stones$Treatment, kidney_stones$Treat.Outcome)
## X-squared = 17.92, df = 2, p-value = 0.0001284
# Break it down into three 2 x 2 tables to explain global chi-square p-value
# - Use the rbind() and c() functions to be build the subsets

# (1) OS and ESWL Groups
# --- OS vs. ESWL
oddsratio(rbind(c(316,36),c(273,77)),rev='columns',method='wald')
## $data
##           Outcome
## Predictor  Disease2 Disease1 Total
##   Exposed1       36      316   352
##   Exposed2       77      273   350
##   Total         113      589   702
## 
## $measure
##           odds ratio with 95% C.I.
## Predictor   estimate     lower     upper
##   Exposed1 1.0000000        NA        NA
##   Exposed2 0.4039125 0.2633856 0.6194164
## 
## $p.value
##           two-sided
## Predictor    midp.exact fisher.exact   chi.square
##   Exposed1           NA           NA           NA
##   Exposed2 2.051104e-05 2.250976e-05 2.197737e-05
## 
## $correction
## [1] FALSE
## 
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# --- ESWL vs. OS
oddsratio(rbind(c(316,36),c(273,77)),rev='both',method='wald')
## $data
##           Outcome
## Predictor  Disease2 Disease1 Total
##   Exposed2       77      273   350
##   Exposed1       36      316   352
##   Total         113      589   702
## 
## $measure
##           odds ratio with 95% C.I.
## Predictor  estimate    lower    upper
##   Exposed2 1.000000       NA       NA
##   Exposed1 2.475783 1.614423 3.796715
## 
## $p.value
##           two-sided
## Predictor    midp.exact fisher.exact   chi.square
##   Exposed2           NA           NA           NA
##   Exposed1 2.051104e-05 2.250976e-05 2.197737e-05
## 
## $correction
## [1] FALSE
## 
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# (2) PNL and ESWL Groups
# --- PNL vs. ESWL
oddsratio(rbind(c(316,36),c(289,61)),rev='columns',method='wald')
## $data
##           Outcome
## Predictor  Disease2 Disease1 Total
##   Exposed1       36      316   352
##   Exposed2       61      289   350
##   Total          97      605   702
## 
## $measure
##           odds ratio with 95% C.I.
## Predictor   estimate     lower    upper
##   Exposed1 1.0000000        NA       NA
##   Exposed2 0.5397385 0.3470083 0.839512
## 
## $p.value
##           two-sided
## Predictor   midp.exact fisher.exact  chi.square
##   Exposed1          NA           NA          NA
##   Exposed2 0.005791516  0.006198166 0.005700501
## 
## $correction
## [1] FALSE
## 
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# --- ESWL vs. PNL
oddsratio(rbind(c(316,36),c(289,61)),rev='both',method='wald')
## $data
##           Outcome
## Predictor  Disease2 Disease1 Total
##   Exposed2       61      289   350
##   Exposed1       36      316   352
##   Total          97      605   702
## 
## $measure
##           odds ratio with 95% C.I.
## Predictor  estimate    lower    upper
##   Exposed2 1.000000       NA       NA
##   Exposed1 1.852749 1.191168 2.881775
## 
## $p.value
##           two-sided
## Predictor   midp.exact fisher.exact  chi.square
##   Exposed2          NA           NA          NA
##   Exposed1 0.005791516  0.006198166 0.005700501
## 
## $correction
## [1] FALSE
## 
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# (3) OS and PNL Groups 
oddsratio(rbind(c(273,77),c(289,61)),rev='columns',method='wald')
## $data
##           Outcome
## Predictor  Disease2 Disease1 Total
##   Exposed1       77      273   350
##   Exposed2       61      289   350
##   Total         138      562   700
## 
## $measure
##           odds ratio with 95% C.I.
## Predictor  estimate     lower    upper
##   Exposed1 1.000000        NA       NA
##   Exposed2 1.336276 0.9188954 1.943238
## 
## $p.value
##           two-sided
## Predictor  midp.exact fisher.exact chi.square
##   Exposed1         NA           NA         NA
##   Exposed2  0.1300246    0.1539742  0.1284954
## 
## $correction
## [1] FALSE
## 
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
oddsratio(rbind(c(273,77),c(289,61)),rev='both',method='wald')
## $data
##           Outcome
## Predictor  Disease2 Disease1 Total
##   Exposed2       61      289   350
##   Exposed1       77      273   350
##   Total         138      562   700
## 
## $measure
##           odds ratio with 95% C.I.
## Predictor   estimate     lower    upper
##   Exposed2 1.0000000        NA       NA
##   Exposed1 0.7483485 0.5146049 1.088263
## 
## $p.value
##           two-sided
## Predictor  midp.exact fisher.exact chi.square
##   Exposed2         NA           NA         NA
##   Exposed1  0.1300246    0.1539742  0.1284954
## 
## $correction
## [1] FALSE
## 
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# - Note: oddsratio() function does NOT give global p-value!
oddsratio(table(kidney_stones),rev='columns',method='wald')
## $data
##               Treat.Outcome
## Treatment        2   1 Total
##   ESWL          36 316   352
##   Open Surgery  77 273   350
##   PNL           61 289   350
##   Total        174 878  1052
## 
## $measure
##               odds ratio with 95% C.I.
## Treatment       estimate     lower     upper
##   ESWL         1.0000000        NA        NA
##   Open Surgery 0.4039125 0.2633856 0.6194164
##   PNL          0.5397385 0.3470083 0.8395120
## 
## $p.value
##               two-sided
## Treatment        midp.exact fisher.exact   chi.square
##   ESWL                   NA           NA           NA
##   Open Surgery 2.051104e-05 2.250976e-05 2.197737e-05
##   PNL          5.791516e-03 6.198166e-03 5.700501e-03
## 
## $correction
## [1] FALSE
## 
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# End of Porgram