#Exercise 5.1

replicates <- 5000
num_of_events <- 289 # recovered
num_at_risk <- 350
risk <- num_of_events/num_at_risk
sim_events <- rbinom(n = replicates, size = num_at_risk, prob = risk)
conf.level <- 0.95
alpha <- 1 - conf.level
# 0.95 CI for binomial counts
quantile(sim_events, c(alpha/2, 1-alpha/2))
##  2.5% 97.5% 
##   274   303
# 0.95 CI for binomial proportions
quantile(sim_events, c(alpha/2, 1-alpha/2))/num_at_risk
##      2.5%     97.5% 
## 0.7828571 0.8657143

#Exercise 5.2

risk.boot <- function(x, n, conf.level = 0.95, replicates = 5000){
    # x = number of events (numerator)
    # n = number at risk (denominator)
    # conf.level = confidence level (default 0.95)
    # replicates = number of simulations
    # prepare inputs
    num_of_events <- x
    num_at_risk <- n
    # do calculations
    risk <- num_of_events/num_at_risk
    sim_events <- rbinom(n = replicates, size = num_at_risk, prob = risk)
    conf.level <- conf.level
    alpha <- 1 - conf.level
    ci <- quantile(sim_events, c(alpha/2, 1-alpha/2))
    # collect results
    list(x = num_of_events,
         n = num_at_risk,
         conf.int = ci,
         conf.level = conf.level)
}

# test
risk.boot(10, 20)
## $x
## [1] 10
## 
## $n
## [1] 20
## 
## $conf.int
##  2.5% 97.5% 
##     6    14 
## 
## $conf.level
## [1] 0.95

#Exercise 5.3

myRR <- function(x, conf.level = 0.95){
    ## x is 2x2 table with this layout
    ## |Outcome| Exposed   |
    ## |       | Yes | No  | Sum |
    ## |-------+-----+-----+-----|
    ## |   Yes |  a  |  b  | M_1 |
    ## |    No |  c  |  d  | M_0 |
    ## |-------+-----+-----|-----|
    ## |   Sum | N_1 | N_0 |  T  |
    ##
    ## Prepare input
    tab <- addmargins(x)
    aa <- tab[1, 1]
    bb <- tab[1, 2]
    N1 <- tab['Sum',1]
    N0 <- tab['Sum',2]
    
    ## Do calculations
    R1 <- aa/N1
    R0 <- bb/N0
    RR <- R1/R0
    Z <- qnorm((1 + conf.level)/2)
    logRR <- log(RR)
    SE_logRR <- sqrt(1/aa - 1/N1 + 1/bb - 1/N0)
    CI <- exp(logRR + c(-1, 1) * Z * SE_logRR)
    pv <- fisher.test(tab[1:2,1:2])$p.value

    ## Collect results
    list(data = tab,
         risks = prop.table(x, 2),
         risk.ratio = RR,
         conf.level = conf.level,
         conf.int = CI,
         p.value = pv)
}

## test function
tab <- matrix(c(273, 289, 77, 61), 2, 2, byrow = TRUE,
              dimnames = list (Recovered = c('Yes', 'No'),
                               Treatment = c('Yes', 'No')))
myRR(tab)
## $data
##          Treatment
## Recovered Yes  No Sum
##       Yes 273 289 562
##       No   77  61 138
##       Sum 350 350 700
## 
## $risks
##          Treatment
## Recovered  Yes        No
##       Yes 0.78 0.8257143
##       No  0.22 0.1742857
## 
## $risk.ratio
## [1] 0.9446367
## 
## $conf.level
## [1] 0.95
## 
## $conf.int
## [1] 0.8776359 1.0167524
## 
## $p.value
## [1] 0.1539742

#Exercise # 5.4

url <- "https://raw.githubusercontent.com/taragonmd/data/master/exported/jp-drugrx-p2.txt"
drugrx <- read.csv(url)
str(drugrx)
## 'data.frame':    700 obs. of  4 variables:
##  $ X        : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Recovered: Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Drug     : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Gender   : Factor w/ 2 levels "Men","Women": 1 1 1 1 1 1 1 1 1 1 ...

ALL

tab_all <- xtabs(~ Recovered + Drug, data = drugrx)[2:1,2:1]
myRR(tab_all)
## $data
##          Drug
## Recovered Yes  No Sum
##       Yes 273 289 562
##       No   77  61 138
##       Sum 350 350 700
## 
## $risks
##          Drug
## Recovered       Yes        No
##       Yes 0.7800000 0.8257143
##       No  0.2200000 0.1742857
## 
## $risk.ratio
## [1] 0.9446367
## 
## $conf.level
## [1] 0.95
## 
## $conf.int
## [1] 0.8776359 1.0167524
## 
## $p.value
## [1] 0.1539742

MEN

tab_men <- xtabs(~ Recovered + Drug + Gender, data = drugrx)[2:1,2:1,'Men']
myRR(tab_men)
## $data
##          Drug
## Recovered Yes  No Sum
##       Yes  81 234 315
##       No    6  36  42
##       Sum  87 270 357
## 
## $risks
##          Drug
## Recovered        Yes         No
##       Yes 0.93103448 0.86666667
##       No  0.06896552 0.13333333
## 
## $risk.ratio
## [1] 1.074271
## 
## $conf.level
## [1] 0.95
## 
## $conf.int
## [1] 0.9977554 1.1566534
## 
## $p.value
## [1] 0.126632

WOMEN

tab_women <- xtabs(~ Recovered + Drug + Gender, data = drugrx)[2:1,2:1,'Women']
myRR(tab_women)
## $data
##          Drug
## Recovered Yes  No Sum
##       Yes 192  55 247
##       No   71  25  96
##       Sum 263  80 343
## 
## $risks
##          Drug
## Recovered      Yes       No
##       Yes 0.730038 0.687500
##       No  0.269962 0.312500
## 
## $risk.ratio
## [1] 1.061873
## 
## $conf.level
## [1] 0.95
## 
## $conf.int
## [1] 0.9003483 1.2523768
## 
## $p.value
## [1] 0.4786164

#Excercise 5.5

oswego2 <- read.csv("https://raw.githubusercontent.com/taragonmd/data/master/oswego/oswego2.txt", strip.white = TRUE)
food.item <- c("Baked.ham", "Spinach", "Mashed.potato", "Cabbage.salad", 
    "Jello", "Rolls", "Brown.bread", "Milk", "Coffee", "Water", "Cakes", 
    "Vanilla.ice.cream", "Chocolate.ice.cream", "Fruit.salad")
names(oswego2)[9:22] <- food.item
str(oswego2)
## 'data.frame':    75 obs. of  22 variables:
##  $ ID                 : int  2 3 4 6 7 8 9 10 14 16 ...
##  $ Age                : int  52 65 59 63 70 40 15 33 10 32 ...
##  $ Sex                : Factor w/ 2 levels "F","M": 1 2 1 1 2 1 1 1 2 1 ...
##  $ MealDate           : Factor w/ 1 level "04/18/1940": 1 1 1 1 1 1 1 1 1 1 ...
##  $ MealTime           : Factor w/ 6 levels "11:00:00","18:30:00",..: 5 2 2 4 4 4 6 3 4 NA ...
##  $ Ill                : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 2 2 2 2 ...
##  $ OnsetDate          : Factor w/ 2 levels "4/18/1940","4/19/1940": 2 2 2 1 1 2 2 1 2 2 ...
##  $ OnsetTime          : Factor w/ 17 levels "00:00:00","00:30:00",..: 2 2 2 15 15 4 3 16 4 7 ...
##  $ Baked.ham          : Factor w/ 2 levels "N","Y": 2 2 2 2 2 1 1 2 1 2 ...
##  $ Spinach            : Factor w/ 2 levels "N","Y": 2 2 2 2 2 1 1 2 1 2 ...
##  $ Mashed.potato      : Factor w/ 2 levels "N","Y": 2 2 1 1 2 1 1 2 1 1 ...
##  $ Cabbage.salad      : Factor w/ 2 levels "N","Y": 1 2 1 2 1 1 1 1 1 1 ...
##  $ Jello              : Factor w/ 2 levels "N","Y": 1 1 1 2 2 1 1 1 1 1 ...
##  $ Rolls              : Factor w/ 2 levels "N","Y": 2 1 1 1 2 1 1 2 1 2 ...
##  $ Brown.bread        : Factor w/ 2 levels "N","Y": 1 1 1 1 2 1 1 2 1 1 ...
##  $ Milk               : Factor w/ 2 levels "N","Y": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Coffee             : Factor w/ 2 levels "N","Y": 2 2 2 1 2 1 1 1 1 2 ...
##  $ Water              : Factor w/ 2 levels "N","Y": 1 1 1 2 2 1 1 2 1 1 ...
##  $ Cakes              : Factor w/ 2 levels "N","Y": 1 1 2 1 1 1 2 1 1 2 ...
##  $ Vanilla.ice.cream  : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 1 2 2 2 ...
##  $ Chocolate.ice.cream: Factor w/ 2 levels "N","Y": 1 2 2 1 1 2 2 2 2 2 ...
##  $ Fruit.salad        : Factor w/ 2 levels "N","Y": 1 1 1 1 1 1 1 1 1 1 ...
head(oswego2)
##   ID Age Sex   MealDate MealTime Ill OnsetDate OnsetTime Baked.ham Spinach
## 1  2  52   F 04/18/1940 20:00:00   Y 4/19/1940  00:30:00         Y       Y
## 2  3  65   M 04/18/1940 18:30:00   Y 4/19/1940  00:30:00         Y       Y
## 3  4  59   F 04/18/1940 18:30:00   Y 4/19/1940  00:30:00         Y       Y
## 4  6  63   F 04/18/1940 19:30:00   Y 4/18/1940  22:30:00         Y       Y
## 5  7  70   M 04/18/1940 19:30:00   Y 4/18/1940  22:30:00         Y       Y
## 6  8  40   F 04/18/1940 19:30:00   Y 4/19/1940  02:00:00         N       N
##   Mashed.potato Cabbage.salad Jello Rolls Brown.bread Milk Coffee Water
## 1             Y             N     N     Y           N    N      Y     N
## 2             Y             Y     N     N           N    N      Y     N
## 3             N             N     N     N           N    N      Y     N
## 4             N             Y     Y     N           N    N      N     Y
## 5             Y             N     Y     Y           Y    N      Y     Y
## 6             N             N     N     N           N    N      N     N
##   Cakes Vanilla.ice.cream Chocolate.ice.cream Fruit.salad
## 1     N                 Y                   N           N
## 2     N                 Y                   Y           N
## 3     Y                 Y                   Y           N
## 4     N                 Y                   N           N
## 5     N                 Y                   N           N
## 6     N                 Y                   Y           N

#Ex 5.5

tab1 = xtabs(~ Ill + Spinach, data = oswego2)
tab1
##    Spinach
## Ill  N  Y
##   N 12 17
##   Y 20 26
epitable <- function(exposure, outcome, digits = 3, ...){
   
    x <- xtabs(~ exposure + outcome, na.action=na.pass)
    rowtot <- apply(x, 1, sum)
    rowdist <- sweep(x, 1, rowtot, '/')
    risk0 <- rowdist['N','Y']
    risk1 <- rowdist['Y','Y']
    rr <- risk1/risk0
    or <- (risk1/(1-risk1)) / (risk0/(1-risk0))
    ft <- fisher.test(x, ...)
    pv <- ft$p.value
    results <- signif(c(risk0, risk1, rr, or, pv), digits = digits)
    names(results) <- c('R_0', 'R_1', 'RR', 'OR', 'p.value')
    list(data = x, results = results)
}
epitable(oswego2$Baked.ham, oswego2$Ill)
## $data
##         outcome
## exposure  N  Y
##        N 12 17
##        Y 17 29
## 
## $results
##     R_0     R_1      RR      OR p.value 
##   0.586   0.630   1.080   1.200   0.809
epitable(oswego2$Spinach, oswego2$Ill)
## $data
##         outcome
## exposure  N  Y
##        N 12 20
##        Y 17 26
## 
## $results
##     R_0     R_1      RR      OR p.value 
##   0.625   0.605   0.967   0.918   1.000
epitable(oswego2$Mashed.potato, oswego2$Ill)
## $data
##         outcome
## exposure  N  Y
##        N 14 23
##        Y 14 23
## 
## $results
##     R_0     R_1      RR      OR p.value 
##   0.622   0.622   1.000   1.000   1.000
epitable(oswego2$Cabbage.salad, oswego2$Ill)
## $data
##         outcome
## exposure  N  Y
##        N 19 28
##        Y 10 18
## 
## $results
##     R_0     R_1      RR      OR p.value 
##   0.596   0.643   1.080   1.220   0.808
epitable(oswego2$Jello, oswego2$Ill)
## $data
##         outcome
## exposure  N  Y
##        N 22 30
##        Y  7 16
## 
## $results
##     R_0     R_1      RR      OR p.value 
##   0.577   0.696   1.210   1.680   0.442
epitable(oswego2$Rolls, oswego2$Ill)
## $data
##         outcome
## exposure  N  Y
##        N 13 25
##        Y 16 21
## 
## $results
##     R_0     R_1      RR      OR p.value 
##   0.658   0.568   0.863   0.682   0.482
epitable(oswego2$Milk, oswego2$Ill)
## $data
##         outcome
## exposure  N  Y
##        N 27 44
##        Y  2  2
## 
## $results
##     R_0     R_1      RR      OR p.value 
##   0.620   0.500   0.807   0.614   0.638
epitable(oswego2$Coffee, oswego2$Ill)
## $data
##         outcome
## exposure  N  Y
##        N 17 27
##        Y 12 19
## 
## $results
##     R_0     R_1      RR      OR p.value 
##   0.614   0.613   0.999   0.997   1.000
epitable(oswego2$Water, oswego2$Ill)
## $data
##         outcome
## exposure  N  Y
##        N 18 33
##        Y 11 13
## 
## $results
##     R_0     R_1      RR      OR p.value 
##   0.647   0.542   0.837   0.645   0.450
epitable(oswego2$Cakes, oswego2$Ill)
## $data
##         outcome
## exposure  N  Y
##        N 16 19
##        Y 13 27
## 
## $results
##     R_0     R_1      RR      OR p.value 
##   0.543   0.675   1.240   1.750   0.342
epitable(oswego2$Vanilla.ice.cream, oswego2$Ill)
## $data
##         outcome
## exposure  N  Y
##        N 18  3
##        Y 11 43
## 
## $results
##      R_0      R_1       RR       OR  p.value 
## 1.43e-01 7.96e-01 5.57e+00 2.35e+01 2.60e-07
epitable(oswego2$Chocolate.ice.cream, oswego2$Ill)
## $data
##         outcome
## exposure  N  Y
##        N  7 20
##        Y 22 25
## 
## $results
##     R_0     R_1      RR      OR p.value 
##  0.7410  0.5320  0.7180  0.3980  0.0891
epitable(oswego2$Fruit.salad, oswego2$Ill)
## $data
##         outcome
## exposure  N  Y
##        N 27 42
##        Y  2  4
## 
## $results
##     R_0     R_1      RR      OR p.value 
##   0.609   0.667   1.100   1.290   1.000

The epitable shows that all of the food items were not statistically significant with exception of vanilla ice cream. In this case, we reject the null hypothesis that their is independence between the two categorical variables.

#Exercise 5.6

outbreak <- matrix(NA, length(food.item), 5)
rownames(outbreak) <- food.item
colnames(outbreak) <- c('R_0','R_1','RR','OR','P value')
for(i in 1:length(food.item)) {
  outbreak[i,] <- epitable(oswego2[,food.item[i]], oswego2$Ill)$results
}
outbreak
##                       R_0   R_1    RR     OR  P value
## Baked.ham           0.586 0.630 1.080  1.200 8.09e-01
## Spinach             0.625 0.605 0.967  0.918 1.00e+00
## Mashed.potato       0.622 0.622 1.000  1.000 1.00e+00
## Cabbage.salad       0.596 0.643 1.080  1.220 8.08e-01
## Jello               0.577 0.696 1.210  1.680 4.42e-01
## Rolls               0.658 0.568 0.863  0.682 4.82e-01
## Brown.bread         0.583 0.667 1.140  1.430 6.22e-01
## Milk                0.620 0.500 0.807  0.614 6.38e-01
## Coffee              0.614 0.613 0.999  0.997 1.00e+00
## Water               0.647 0.542 0.837  0.645 4.50e-01
## Cakes               0.543 0.675 1.240  1.750 3.42e-01
## Vanilla.ice.cream   0.143 0.796 5.570 23.500 2.60e-07
## Chocolate.ice.cream 0.741 0.532 0.718  0.398 8.91e-02
## Fruit.salad         0.609 0.667 1.100  1.290 1.00e+00

#Exercise 5.7

myepitab <- function(x) {
    epitable(x, oswego2$Ill)$results
}
lapply(oswego2[, food.item], myepitab)
## $Baked.ham
##     R_0     R_1      RR      OR p.value 
##   0.586   0.630   1.080   1.200   0.809 
## 
## $Spinach
##     R_0     R_1      RR      OR p.value 
##   0.625   0.605   0.967   0.918   1.000 
## 
## $Mashed.potato
##     R_0     R_1      RR      OR p.value 
##   0.622   0.622   1.000   1.000   1.000 
## 
## $Cabbage.salad
##     R_0     R_1      RR      OR p.value 
##   0.596   0.643   1.080   1.220   0.808 
## 
## $Jello
##     R_0     R_1      RR      OR p.value 
##   0.577   0.696   1.210   1.680   0.442 
## 
## $Rolls
##     R_0     R_1      RR      OR p.value 
##   0.658   0.568   0.863   0.682   0.482 
## 
## $Brown.bread
##     R_0     R_1      RR      OR p.value 
##   0.583   0.667   1.140   1.430   0.622 
## 
## $Milk
##     R_0     R_1      RR      OR p.value 
##   0.620   0.500   0.807   0.614   0.638 
## 
## $Coffee
##     R_0     R_1      RR      OR p.value 
##   0.614   0.613   0.999   0.997   1.000 
## 
## $Water
##     R_0     R_1      RR      OR p.value 
##   0.647   0.542   0.837   0.645   0.450 
## 
## $Cakes
##     R_0     R_1      RR      OR p.value 
##   0.543   0.675   1.240   1.750   0.342 
## 
## $Vanilla.ice.cream
##      R_0      R_1       RR       OR  p.value 
## 1.43e-01 7.96e-01 5.57e+00 2.35e+01 2.60e-07 
## 
## $Chocolate.ice.cream
##     R_0     R_1      RR      OR p.value 
##  0.7410  0.5320  0.7180  0.3980  0.0891 
## 
## $Fruit.salad
##     R_0     R_1      RR      OR p.value 
##   0.609   0.667   1.100   1.290   1.000
t(sapply(oswego2[, food.item], myepitab))  # transpose output
##                       R_0   R_1    RR     OR  p.value
## Baked.ham           0.586 0.630 1.080  1.200 8.09e-01
## Spinach             0.625 0.605 0.967  0.918 1.00e+00
## Mashed.potato       0.622 0.622 1.000  1.000 1.00e+00
## Cabbage.salad       0.596 0.643 1.080  1.220 8.08e-01
## Jello               0.577 0.696 1.210  1.680 4.42e-01
## Rolls               0.658 0.568 0.863  0.682 4.82e-01
## Brown.bread         0.583 0.667 1.140  1.430 6.22e-01
## Milk                0.620 0.500 0.807  0.614 6.38e-01
## Coffee              0.614 0.613 0.999  0.997 1.00e+00
## Water               0.647 0.542 0.837  0.645 4.50e-01
## Cakes               0.543 0.675 1.240  1.750 3.42e-01
## Vanilla.ice.cream   0.143 0.796 5.570 23.500 2.60e-07
## Chocolate.ice.cream 0.741 0.532 0.718  0.398 8.91e-02
## Fruit.salad         0.609 0.667 1.100  1.290 1.00e+00

#Exercise 5.8

a <- 1
b <- 2
f <- function(x) {
    a * x + b
}
g <- function(x) {
    a <- 2
    b <- 1
    f(x)
}
g(2)
## [1] 4

#Exercise 5.9

g <- function(x) {
    a <- 2
    b <- 1
    f <- function(x) {
        a * x + b
    }
    f(x)
}
g(2)
## [1] 5