##Exercise 5.1.
risk <- 289 / 350
sim_events <- rbinom(n = 5000, size = 350, prob = risk)
quantile(sim_events, c(0.025, 0.975))
## 2.5% 97.5%
## 275 303
##Exercise 5.2.
quantile(sim_events, c(0.025, 0.975)) / 350
## 2.5% 97.5%
## 0.7857143 0.8657143
##Exercise 5.3.
R1 <- 273 / 350
R0 <- 289 / 350
RR <- R1 / R0
SE <- sqrt(1/273 - 1/350 + 1/289 - 1/350)
Lower_CI <- RR - 1.96 * SE
Upper_CI <- RR + 1.96 * SE
data <- c(273, 77, 289, 61)
new_drug <- matrix(data, nrow = 2)
stats <- fisher.test(x = new_drug)
list(R1, R0, RR, Lower_CI, Upper_CI, stats)
## [[1]]
## [1] 0.78
##
## [[2]]
## [1] 0.8257143
##
## [[3]]
## [1] 0.9446367
##
## [[4]]
## [1] 0.8710668
##
## [[5]]
## [1] 1.018207
##
## [[6]]
##
## Fisher's Exact Test for Count Data
##
## data: new_drug
## p-value = 0.154
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.504886 1.106634
## sample estimates:
## odds ratio
## 0.7486641
##Exercise 5.4.
drugrx.df <- read.csv("https://raw.githubusercontent.com/taragonmd/data/master/drugrx-pearl2.csv")
str(drugrx.df)
## 'data.frame': 700 obs. of 4 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Recovered: Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ Drug : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ Gender : Factor w/ 2 levels "Men","Women": 1 1 1 1 1 1 1 1 1 1 ...
head(drugrx.df)
## X Recovered Drug Gender
## 1 1 Yes Yes Men
## 2 2 Yes Yes Men
## 3 3 Yes Yes Men
## 4 4 Yes Yes Men
## 5 5 Yes Yes Men
## 6 6 Yes Yes Men
xtabs(~drugrx.df$Recovered + drugrx.df$Drug)
## drugrx.df$Drug
## drugrx.df$Recovered No Yes
## No 61 77
## Yes 289 273
RR_total <- (273 / (77+273)) / (289 / (289+61)); RR_total
## [1] 0.9446367
xtabs(~drugrx.df$Recovered + drugrx.df$Drug + drugrx.df$Gender)
## , , drugrx.df$Gender = Men
##
## drugrx.df$Drug
## drugrx.df$Recovered No Yes
## No 36 6
## Yes 234 81
##
## , , drugrx.df$Gender = Women
##
## drugrx.df$Drug
## drugrx.df$Recovered No Yes
## No 25 71
## Yes 55 192
RR_men <- (81/(81+6)) / (234/(36+234)); RR_men
## [1] 1.074271
RR_women <- (192/(71+192)) / (55/(55+25)); RR_women
## [1] 1.061873
The drug treatment is not effective in a whole, but it’s a little effective if stratified by gender. There’s not much difference of effectiveness between men and women (the effectiveness in men is a little bit better than in women).
##Exercise 5.5.
oswego2 <- read.csv("https://raw.githubusercontent.com/taragonmd/data/master/oswego/oswego2.txt", strip.white = TRUE)
food.item <- c("Baked.ham","Spinach","Mashed.potato","Cabbage.salad","Jello","Rolls","Brown.bread","Milk","Coffee","Water","Cakes","Vanilla.ice.cream","Chocolate.ice.cream","Fruit.salad")
names(oswego2)[9:22] <- food.item
epitable <- function(exposure, outcome, digits = 3, ...){
x <- xtabs(~ exposure + outcome, na.action = na.pass)
rowtot <- apply(x, 1, sum)
rowdist <- sweep(x, 1, rowtot, '/')
risk0 <- rowdist['N','Y']
risk1 <- rowdist['Y','Y']
rr <- risk1 / risk0
or <- (risk1 / (1-risk1)) / (risk0 / (1-risk0))
ft <- fisher.test(x, ...)
pv <- ft$p.value
results <- signif(c(risk0, risk1, rr, or, pv), digits = digits)
names(results) <- c('R_0', 'R_1', 'RR', 'OR', 'p.value')
list(data = x, results = results)
}
data <- rbind(epitable(oswego2$Baked.ham, oswego2$Ill)$results, epitable(oswego2$Spinach, oswego2$Ill)$results, epitable(oswego2$Mashed.potato, oswego2$Ill)$results,
epitable(oswego2$Cabbage.salad, oswego2$Ill)$results,
epitable(oswego2$Jello, oswego2$Ill)$results,
epitable(oswego2$Rolls, oswego2$Ill)$results,
epitable(oswego2$Brown.bread, oswego2$Ill)$results,
epitable(oswego2$Milk, oswego2$Ill)$results,
epitable(oswego2$Coffee, oswego2$Ill)$results,
epitable(oswego2$Water, oswego2$Ill)$results,
epitable(oswego2$Cakes, oswego2$Ill)$results,
epitable(oswego2$Vanilla.ice.cream, oswego2$Ill)$results,
epitable(oswego2$Chocolate.ice.cream, oswego2$Ill)$results,
epitable(oswego2$Fruit.salad, oswego2$Ill)$results)
food.data <- cbind(food.item, data)
as.data.frame(food.data)
## food.item R_0 R_1 RR OR p.value
## 1 Baked.ham 0.586 0.63 1.08 1.2 0.809
## 2 Spinach 0.625 0.605 0.967 0.918 1
## 3 Mashed.potato 0.622 0.622 1 1 1
## 4 Cabbage.salad 0.596 0.643 1.08 1.22 0.808
## 5 Jello 0.577 0.696 1.21 1.68 0.442
## 6 Rolls 0.658 0.568 0.863 0.682 0.482
## 7 Brown.bread 0.583 0.667 1.14 1.43 0.622
## 8 Milk 0.62 0.5 0.807 0.614 0.638
## 9 Coffee 0.614 0.613 0.999 0.997 1
## 10 Water 0.647 0.542 0.837 0.645 0.45
## 11 Cakes 0.543 0.675 1.24 1.75 0.342
## 12 Vanilla.ice.cream 0.143 0.796 5.57 23.5 2.6e-07
## 13 Chocolate.ice.cream 0.741 0.532 0.718 0.398 0.0891
## 14 Fruit.salad 0.609 0.667 1.1 1.29 1
Vanilla ice cream should be the source of illness (has a significantly high RR and OR).
##Exercise 5.6.
outbreak <- matrix(NA, length(food.item), 5)
rownames(outbreak) <- food.item
colnames(outbreak) <- c('R_0','R_1','RR','OR','P value')
for(i in 1:length(food.item)){
outbreak[i,] <- epitable(oswego2[, food.item[i]], oswego2$Ill)$results
}
outbreak
## R_0 R_1 RR OR P value
## Baked.ham 0.586 0.630 1.080 1.200 8.09e-01
## Spinach 0.625 0.605 0.967 0.918 1.00e+00
## Mashed.potato 0.622 0.622 1.000 1.000 1.00e+00
## Cabbage.salad 0.596 0.643 1.080 1.220 8.08e-01
## Jello 0.577 0.696 1.210 1.680 4.42e-01
## Rolls 0.658 0.568 0.863 0.682 4.82e-01
## Brown.bread 0.583 0.667 1.140 1.430 6.22e-01
## Milk 0.620 0.500 0.807 0.614 6.38e-01
## Coffee 0.614 0.613 0.999 0.997 1.00e+00
## Water 0.647 0.542 0.837 0.645 4.50e-01
## Cakes 0.543 0.675 1.240 1.750 3.42e-01
## Vanilla.ice.cream 0.143 0.796 5.570 23.500 2.60e-07
## Chocolate.ice.cream 0.741 0.532 0.718 0.398 8.91e-02
## Fruit.salad 0.609 0.667 1.100 1.290 1.00e+00
##Exercise 5.7.
myepitab <- function(x){
epitable(x, oswego2$Ill)$results
}
lapply(oswego2[,food.item], myepitab) #each element of oswego2[,food.item], which is basically the data for each food.item, is applied to the function myepitab, and shown as a list.
## $Baked.ham
## R_0 R_1 RR OR p.value
## 0.586 0.630 1.080 1.200 0.809
##
## $Spinach
## R_0 R_1 RR OR p.value
## 0.625 0.605 0.967 0.918 1.000
##
## $Mashed.potato
## R_0 R_1 RR OR p.value
## 0.622 0.622 1.000 1.000 1.000
##
## $Cabbage.salad
## R_0 R_1 RR OR p.value
## 0.596 0.643 1.080 1.220 0.808
##
## $Jello
## R_0 R_1 RR OR p.value
## 0.577 0.696 1.210 1.680 0.442
##
## $Rolls
## R_0 R_1 RR OR p.value
## 0.658 0.568 0.863 0.682 0.482
##
## $Brown.bread
## R_0 R_1 RR OR p.value
## 0.583 0.667 1.140 1.430 0.622
##
## $Milk
## R_0 R_1 RR OR p.value
## 0.620 0.500 0.807 0.614 0.638
##
## $Coffee
## R_0 R_1 RR OR p.value
## 0.614 0.613 0.999 0.997 1.000
##
## $Water
## R_0 R_1 RR OR p.value
## 0.647 0.542 0.837 0.645 0.450
##
## $Cakes
## R_0 R_1 RR OR p.value
## 0.543 0.675 1.240 1.750 0.342
##
## $Vanilla.ice.cream
## R_0 R_1 RR OR p.value
## 1.43e-01 7.96e-01 5.57e+00 2.35e+01 2.60e-07
##
## $Chocolate.ice.cream
## R_0 R_1 RR OR p.value
## 0.7410 0.5320 0.7180 0.3980 0.0891
##
## $Fruit.salad
## R_0 R_1 RR OR p.value
## 0.609 0.667 1.100 1.290 1.000
t(sapply(oswego2[,food.item], myepitab)) #wraps the results of lapply into a matrix.
## R_0 R_1 RR OR p.value
## Baked.ham 0.586 0.630 1.080 1.200 8.09e-01
## Spinach 0.625 0.605 0.967 0.918 1.00e+00
## Mashed.potato 0.622 0.622 1.000 1.000 1.00e+00
## Cabbage.salad 0.596 0.643 1.080 1.220 8.08e-01
## Jello 0.577 0.696 1.210 1.680 4.42e-01
## Rolls 0.658 0.568 0.863 0.682 4.82e-01
## Brown.bread 0.583 0.667 1.140 1.430 6.22e-01
## Milk 0.620 0.500 0.807 0.614 6.38e-01
## Coffee 0.614 0.613 0.999 0.997 1.00e+00
## Water 0.647 0.542 0.837 0.645 4.50e-01
## Cakes 0.543 0.675 1.240 1.750 3.42e-01
## Vanilla.ice.cream 0.143 0.796 5.570 23.500 2.60e-07
## Chocolate.ice.cream 0.741 0.532 0.718 0.398 8.91e-02
## Fruit.salad 0.609 0.667 1.100 1.290 1.00e+00
lapply returns a list of the same length as X, while sapply wraps the lapply into a matrix.
Exercise 5.8.
a <- 1
b <- 2
f <- function(x){
a * x + b
}
g <- function(x){
a <- 2
b <- 1
f(x)
}
g(2)
## [1] 4
The result equals 4 because the function g(2) would first go to f(2), and since a and b are free variables in function f, R needs to go to global envrionment to find the numbers assigned to a and b, which are a = 1 and b = 2.
##Exercise 5.9.
g <- function(x){
a <- 2
b <- 1
f <- function(x){
a * x + b
}
f(x)
}
g(2)
## [1] 5
The result equals to 5 because in the function f(), a and b are free variables. R would then look in the parent envrionment in which a = 2 and b = 1.