##Exercise 5.1.

risk <- 289 / 350
sim_events <- rbinom(n = 5000, size = 350, prob = risk)
quantile(sim_events, c(0.025, 0.975))
##  2.5% 97.5% 
##   275   303

##Exercise 5.2.

quantile(sim_events, c(0.025, 0.975)) / 350
##      2.5%     97.5% 
## 0.7857143 0.8657143

##Exercise 5.3.

R1 <- 273 / 350
R0 <- 289 / 350
RR <- R1 / R0
SE <- sqrt(1/273 - 1/350 + 1/289 - 1/350)
Lower_CI <- RR - 1.96 * SE
Upper_CI <- RR + 1.96 * SE

data <- c(273, 77, 289, 61)
new_drug <- matrix(data, nrow = 2)
stats <- fisher.test(x = new_drug)

list(R1, R0, RR, Lower_CI, Upper_CI, stats)
## [[1]]
## [1] 0.78
## 
## [[2]]
## [1] 0.8257143
## 
## [[3]]
## [1] 0.9446367
## 
## [[4]]
## [1] 0.8710668
## 
## [[5]]
## [1] 1.018207
## 
## [[6]]
## 
##  Fisher's Exact Test for Count Data
## 
## data:  new_drug
## p-value = 0.154
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.504886 1.106634
## sample estimates:
## odds ratio 
##  0.7486641

##Exercise 5.4.

drugrx.df <- read.csv("https://raw.githubusercontent.com/taragonmd/data/master/drugrx-pearl2.csv")
str(drugrx.df)
## 'data.frame':    700 obs. of  4 variables:
##  $ X        : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Recovered: Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Drug     : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Gender   : Factor w/ 2 levels "Men","Women": 1 1 1 1 1 1 1 1 1 1 ...
head(drugrx.df)
##   X Recovered Drug Gender
## 1 1       Yes  Yes    Men
## 2 2       Yes  Yes    Men
## 3 3       Yes  Yes    Men
## 4 4       Yes  Yes    Men
## 5 5       Yes  Yes    Men
## 6 6       Yes  Yes    Men
xtabs(~drugrx.df$Recovered + drugrx.df$Drug)
##                    drugrx.df$Drug
## drugrx.df$Recovered  No Yes
##                 No   61  77
##                 Yes 289 273
RR_total <- (273 / (77+273)) / (289 / (289+61)); RR_total
## [1] 0.9446367
xtabs(~drugrx.df$Recovered + drugrx.df$Drug + drugrx.df$Gender)
## , , drugrx.df$Gender = Men
## 
##                    drugrx.df$Drug
## drugrx.df$Recovered  No Yes
##                 No   36   6
##                 Yes 234  81
## 
## , , drugrx.df$Gender = Women
## 
##                    drugrx.df$Drug
## drugrx.df$Recovered  No Yes
##                 No   25  71
##                 Yes  55 192
RR_men <- (81/(81+6)) / (234/(36+234)); RR_men
## [1] 1.074271
RR_women <- (192/(71+192)) / (55/(55+25)); RR_women
## [1] 1.061873

The drug treatment is not effective in a whole, but it’s a little effective if stratified by gender. There’s not much difference of effectiveness between men and women (the effectiveness in men is a little bit better than in women).

##Exercise 5.5.

oswego2 <- read.csv("https://raw.githubusercontent.com/taragonmd/data/master/oswego/oswego2.txt", strip.white = TRUE)
food.item <- c("Baked.ham","Spinach","Mashed.potato","Cabbage.salad","Jello","Rolls","Brown.bread","Milk","Coffee","Water","Cakes","Vanilla.ice.cream","Chocolate.ice.cream","Fruit.salad")
names(oswego2)[9:22] <- food.item
epitable <- function(exposure, outcome, digits = 3, ...){
  x <- xtabs(~ exposure + outcome, na.action = na.pass)
  rowtot <- apply(x, 1, sum)
  rowdist <- sweep(x, 1, rowtot, '/')
  risk0 <- rowdist['N','Y']
  risk1 <- rowdist['Y','Y']
  rr <- risk1 / risk0
  or <- (risk1 / (1-risk1)) / (risk0 / (1-risk0))
  ft <- fisher.test(x, ...)
  pv <- ft$p.value
  results <- signif(c(risk0, risk1, rr, or, pv), digits = digits)
  names(results) <- c('R_0', 'R_1', 'RR', 'OR', 'p.value')
  list(data = x, results = results)
}

data <- rbind(epitable(oswego2$Baked.ham, oswego2$Ill)$results, epitable(oswego2$Spinach, oswego2$Ill)$results, epitable(oswego2$Mashed.potato, oswego2$Ill)$results,
epitable(oswego2$Cabbage.salad, oswego2$Ill)$results,
epitable(oswego2$Jello, oswego2$Ill)$results, 
epitable(oswego2$Rolls, oswego2$Ill)$results,
epitable(oswego2$Brown.bread, oswego2$Ill)$results,
epitable(oswego2$Milk, oswego2$Ill)$results,
epitable(oswego2$Coffee, oswego2$Ill)$results,
epitable(oswego2$Water, oswego2$Ill)$results,
epitable(oswego2$Cakes, oswego2$Ill)$results,
epitable(oswego2$Vanilla.ice.cream, oswego2$Ill)$results,
epitable(oswego2$Chocolate.ice.cream, oswego2$Ill)$results,
epitable(oswego2$Fruit.salad, oswego2$Ill)$results)
food.data <- cbind(food.item, data)
as.data.frame(food.data)
##              food.item   R_0   R_1    RR    OR p.value
## 1            Baked.ham 0.586  0.63  1.08   1.2   0.809
## 2              Spinach 0.625 0.605 0.967 0.918       1
## 3        Mashed.potato 0.622 0.622     1     1       1
## 4        Cabbage.salad 0.596 0.643  1.08  1.22   0.808
## 5                Jello 0.577 0.696  1.21  1.68   0.442
## 6                Rolls 0.658 0.568 0.863 0.682   0.482
## 7          Brown.bread 0.583 0.667  1.14  1.43   0.622
## 8                 Milk  0.62   0.5 0.807 0.614   0.638
## 9               Coffee 0.614 0.613 0.999 0.997       1
## 10               Water 0.647 0.542 0.837 0.645    0.45
## 11               Cakes 0.543 0.675  1.24  1.75   0.342
## 12   Vanilla.ice.cream 0.143 0.796  5.57  23.5 2.6e-07
## 13 Chocolate.ice.cream 0.741 0.532 0.718 0.398  0.0891
## 14         Fruit.salad 0.609 0.667   1.1  1.29       1

Vanilla ice cream should be the source of illness (has a significantly high RR and OR).

##Exercise 5.6.

outbreak <- matrix(NA, length(food.item), 5)
rownames(outbreak) <- food.item
colnames(outbreak) <- c('R_0','R_1','RR','OR','P value')
for(i in 1:length(food.item)){
  outbreak[i,] <- epitable(oswego2[, food.item[i]], oswego2$Ill)$results
}
outbreak
##                       R_0   R_1    RR     OR  P value
## Baked.ham           0.586 0.630 1.080  1.200 8.09e-01
## Spinach             0.625 0.605 0.967  0.918 1.00e+00
## Mashed.potato       0.622 0.622 1.000  1.000 1.00e+00
## Cabbage.salad       0.596 0.643 1.080  1.220 8.08e-01
## Jello               0.577 0.696 1.210  1.680 4.42e-01
## Rolls               0.658 0.568 0.863  0.682 4.82e-01
## Brown.bread         0.583 0.667 1.140  1.430 6.22e-01
## Milk                0.620 0.500 0.807  0.614 6.38e-01
## Coffee              0.614 0.613 0.999  0.997 1.00e+00
## Water               0.647 0.542 0.837  0.645 4.50e-01
## Cakes               0.543 0.675 1.240  1.750 3.42e-01
## Vanilla.ice.cream   0.143 0.796 5.570 23.500 2.60e-07
## Chocolate.ice.cream 0.741 0.532 0.718  0.398 8.91e-02
## Fruit.salad         0.609 0.667 1.100  1.290 1.00e+00

##Exercise 5.7.

myepitab <- function(x){
  epitable(x, oswego2$Ill)$results
}
lapply(oswego2[,food.item], myepitab) #each element of oswego2[,food.item], which is basically the data for each food.item, is applied to the function myepitab, and shown as a list. 
## $Baked.ham
##     R_0     R_1      RR      OR p.value 
##   0.586   0.630   1.080   1.200   0.809 
## 
## $Spinach
##     R_0     R_1      RR      OR p.value 
##   0.625   0.605   0.967   0.918   1.000 
## 
## $Mashed.potato
##     R_0     R_1      RR      OR p.value 
##   0.622   0.622   1.000   1.000   1.000 
## 
## $Cabbage.salad
##     R_0     R_1      RR      OR p.value 
##   0.596   0.643   1.080   1.220   0.808 
## 
## $Jello
##     R_0     R_1      RR      OR p.value 
##   0.577   0.696   1.210   1.680   0.442 
## 
## $Rolls
##     R_0     R_1      RR      OR p.value 
##   0.658   0.568   0.863   0.682   0.482 
## 
## $Brown.bread
##     R_0     R_1      RR      OR p.value 
##   0.583   0.667   1.140   1.430   0.622 
## 
## $Milk
##     R_0     R_1      RR      OR p.value 
##   0.620   0.500   0.807   0.614   0.638 
## 
## $Coffee
##     R_0     R_1      RR      OR p.value 
##   0.614   0.613   0.999   0.997   1.000 
## 
## $Water
##     R_0     R_1      RR      OR p.value 
##   0.647   0.542   0.837   0.645   0.450 
## 
## $Cakes
##     R_0     R_1      RR      OR p.value 
##   0.543   0.675   1.240   1.750   0.342 
## 
## $Vanilla.ice.cream
##      R_0      R_1       RR       OR  p.value 
## 1.43e-01 7.96e-01 5.57e+00 2.35e+01 2.60e-07 
## 
## $Chocolate.ice.cream
##     R_0     R_1      RR      OR p.value 
##  0.7410  0.5320  0.7180  0.3980  0.0891 
## 
## $Fruit.salad
##     R_0     R_1      RR      OR p.value 
##   0.609   0.667   1.100   1.290   1.000
t(sapply(oswego2[,food.item], myepitab)) #wraps the results of lapply into a matrix. 
##                       R_0   R_1    RR     OR  p.value
## Baked.ham           0.586 0.630 1.080  1.200 8.09e-01
## Spinach             0.625 0.605 0.967  0.918 1.00e+00
## Mashed.potato       0.622 0.622 1.000  1.000 1.00e+00
## Cabbage.salad       0.596 0.643 1.080  1.220 8.08e-01
## Jello               0.577 0.696 1.210  1.680 4.42e-01
## Rolls               0.658 0.568 0.863  0.682 4.82e-01
## Brown.bread         0.583 0.667 1.140  1.430 6.22e-01
## Milk                0.620 0.500 0.807  0.614 6.38e-01
## Coffee              0.614 0.613 0.999  0.997 1.00e+00
## Water               0.647 0.542 0.837  0.645 4.50e-01
## Cakes               0.543 0.675 1.240  1.750 3.42e-01
## Vanilla.ice.cream   0.143 0.796 5.570 23.500 2.60e-07
## Chocolate.ice.cream 0.741 0.532 0.718  0.398 8.91e-02
## Fruit.salad         0.609 0.667 1.100  1.290 1.00e+00

lapply returns a list of the same length as X, while sapply wraps the lapply into a matrix.

Exercise 5.8.

a <- 1
b <- 2
f <- function(x){
  a * x + b
}
g <- function(x){
  a <- 2
  b <- 1
  f(x)
}
g(2)
## [1] 4

The result equals 4 because the function g(2) would first go to f(2), and since a and b are free variables in function f, R needs to go to global envrionment to find the numbers assigned to a and b, which are a = 1 and b = 2.

##Exercise 5.9.

g <- function(x){
  a <- 2
  b <- 1
  f <- function(x){
    a * x + b
  }
  f(x)
}
g(2)
## [1] 5

The result equals to 5 because in the function f(), a and b are free variables. R would then look in the parent envrionment in which a = 2 and b = 1.