# we need the epitools package loaded 
# to calculate odds and risk ratios
#install.packages('epitools')
library(epitools)

# water-pipe smoking table
# Lets create the table and call it table
# we can using the rbind and c functions to do this
table <- rbind(c(58,101),c(88,88))
table
##      [,1] [,2]
## [1,]   58  101
## [2,]   88   88
# we can easily calculate descriptive proportions
# for females and males
# proportion of females who have tried a water-pipe
58/(58+88)
## [1] 0.3972603
# this yields 39.7% 
# proportion of males who have tried a water-pipe
101/(101+88)
## [1] 0.5343915
# this yields 53.4% 
# the risk difference is (females - males)
58/(58+88) - 101/(101+88)
## [1] -0.1371313
# this yields -13.7%

# we can test the significance of this risk difference
# using a homogeneity of proportions test
# for females, numerator=58, denominator=58+88=146
# for males, numerator=101, denominator=101+88=189
# we add correct=FALSE to prevent continuity correction
prop.test(x=c(58,101),n=c(146,189),correct=FALSE)
## 
##  2-sample test for equality of proportions without continuity
##  correction
## 
## data:  c(58, 101) out of c(146, 189)
## X-squared = 6.2119, df = 1, p-value = 0.01269
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.24370221 -0.03056031
## sample estimates:
##    prop 1    prop 2 
## 0.3972603 0.5343915
# this yields a p-value = 0.01269
# with a 95% confidence interval from -24.4% to -3.1%

# we could also analyze this using the chi-square
# test of independence.
# the input is the table object we created above.
# we can use $expected to check whether normality assumptions
# are met
chisq.test(x=table,correct=FALSE)$expected 
##          [,1]     [,2]
## [1,] 69.29552 89.70448
## [2,] 76.70448 99.29552
# This yields expected cell counts that are all greater than 5

# - Run the test 
chisq.test(x=table,correct=FALSE)
## 
##  Pearson's Chi-squared test
## 
## data:  table
## X-squared = 6.2119, df = 1, p-value = 0.01269
# this yields a p-value =0.01269 which is the same as 
# we obtained above with the proportions test (as it should be)
# we can calculate a risk or odds ratio to accompany this test 
# as an effect estimate.  Usually you would use either the RR or OR
# but not both -- if you can calculate an RR that is usually 
# preferable which is what we will do here -- showing both for 
# the purpose of illustration

# Estimate Risk Ratio: 
# Let's express risk in terms 
# of females vs males
# so non-smoking males are the reference cell
# since riskratio expects the reference cell
# to be in the upper left cell of the table
# we need to use rev=both here
riskratio(table,rev='both',method='wald')
## $data
##           Outcome
## Predictor  Disease2 Disease1 Total
##   Exposed2       88       88   176
##   Exposed1      101       58   159
##   Total         189      146   335
## 
## $measure
##           risk ratio with 95% C.I.
## Predictor   estimate     lower     upper
##   Exposed2 1.0000000        NA        NA
##   Exposed1 0.7295597 0.5666036 0.9393824
## 
## $p.value
##           two-sided
## Predictor  midp.exact fisher.exact chi.square
##   Exposed2         NA           NA         NA
##   Exposed1 0.01306395    0.0151849 0.01268963
## 
## $correction
## [1] FALSE
## 
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# this yields RR=0.73, 95% CI=0.57,0.94
# with p-value = 0.01269 (as above with our other tests)

# if we wanted an odds ratio we would do:
# Estimate Odds Ratio: 
oddsratio(table,rev='both',method='wald')
## $data
##           Outcome
## Predictor  Disease2 Disease1 Total
##   Exposed2       88       88   176
##   Exposed1      101       58   159
##   Total         189      146   335
## 
## $measure
##           odds ratio with 95% C.I.
## Predictor   estimate     lower     upper
##   Exposed2 1.0000000        NA        NA
##   Exposed1 0.5742574 0.3706982 0.8895959
## 
## $p.value
##           two-sided
## Predictor  midp.exact fisher.exact chi.square
##   Exposed2         NA           NA         NA
##   Exposed1 0.01306395    0.0151849 0.01268963
## 
## $correction
## [1] FALSE
## 
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
# this yields OR=0.57, 95% CI=0.37,0.89
# with p-value = 0.01269 (as above with our other tests)
# - Here we could remove the rev='both' input,
# since for 2 x2 table the answer will be eqivalent for OR