# NEJM Peanut Allergy Example
# Verifying Primary Outcome Analysis
# paper reference
# 'doi.org/10.1056/NEJMoa1414850'

# we need the epitools package loaded 
# to calculate odds and risk ratios
# install.packages('epitools')
library(epitools)

# peanut allergy table
# Lets create the table and call it table
# we can using the rbind and c functions to do this
table <- rbind(c(36,227),c(5,262))
table
##      [,1] [,2]
## [1,]   36  227
## [2,]    5  262
# we can easily calculate descriptive proportions
# for the groups
# proportion of avoidance who were allergic
36/(36+227)
## [1] 0.1368821
# this yields 13.7% 
# proportion of consumption who were allergic
5/(5+262)
## [1] 0.01872659
# this yields 1.9%
# the risk difference is (avoidance-consumption)
36/(36+227) - 5/(5+262)
## [1] 0.1181555
# this yields 11.8%

# we can test the significance of this risk difference
# using a homogeneity of proportions test
# for avoidance, numerator=36, denominator=36+227=263
# for consumption, numerator=5, denominator=5+262=267
# we add correct=FALSE to prevent continuity correction
prop.test(x=c(36,5),n=c(263,267),conf.level = 0.95, correct=FALSE)
## 
##  2-sample test for equality of proportions without continuity
##  correction
## 
## data:  c(36, 5) out of c(263, 267)
## X-squared = 25.915, df = 1, p-value = 3.567e-07
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.07354555 0.16276553
## sample estimates:
##     prop 1     prop 2 
## 0.13688213 0.01872659
# this yields a p-value < 0.0001
# with a 95% confidence interval from 7.4% to 16.3%

# Estimate Risk Ratio: 
# Let's express risk in terms 
# of consumption vs avoidance to match the 
# relative prevalence (or risk) reduction 
# interpretation provided by the authors.
# Thus non-allergic avoidance would be the appropriate
# reference cell.  Since riskratio expects the reference cell
# to be in the upper left cell of the table
# we need to simply switch the columns and not 
# the columns so we need rev=columns here
riskratio(table,rev='columns',method='wald')
## $data
##           Outcome
## Predictor  Disease2 Disease1 Total
##   Exposed1      227       36   263
##   Exposed2      262        5   267
##   Total         489       41   530
## 
## $measure
##           risk ratio with 95% C.I.
## Predictor   estimate      lower     upper
##   Exposed1 1.0000000         NA        NA
##   Exposed2 0.1368082 0.05453236 0.3432177
## 
## $p.value
##           two-sided
## Predictor    midp.exact fisher.exact   chi.square
##   Exposed1           NA           NA           NA
##   Exposed2 1.294368e-07 1.389354e-07 3.567075e-07
## 
## $correction
## [1] FALSE
## 
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# this yields RR=0.137, 95% CI= 0.06, 0.34
# with p-value < 0.0001 (as above with our other tests)
# Thus, relative risk reduction of consumption compared
# to avoidance is (1-0.137)= 0.863 or 86.3%.


# I have included code below for 
# running exact binomial confidence intervals
# thinking that maybe they used this approach
# and failed to describe it.  I will warn you
# that running the code below is computational 
# intensive and may take hours to complete.  I haven't 
# run it myself.  I did check the exact interval
# for the positive skin test cohort which is much
# smaller and the exact interval did not match
# the interval given in the paper.
# install.packages("ExactCIdiff")
# library(ExactCIdiff)
# ci95 <- BinomCI(227, 262, 36, 5)$ExactCI