#Exercise 5.1
replicates <- 5000
num_of_events <- 289 # recovered
num_at_risk <- 350
risk <- num_of_events/num_at_risk
sim_events <- rbinom(n = replicates, size = num_at_risk, prob = risk)
conf.level <- 0.95
alpha <- 1 - conf.level
# 0.95 CI for binomial counts
quantile(sim_events, c(alpha/2, 1-alpha/2))
## 2.5% 97.5%
## 274 303
# 0.95 CI for binomial proportions
quantile(sim_events, c(alpha/2, 1-alpha/2))/num_at_risk
## 2.5% 97.5%
## 0.7828571 0.8657143
#Exercise 5.2
risk.boot <- function(x, n, conf.level = 0.95, replicates = 5000){
# x = number of events (numerator)
# n = number at risk (denominator)
# conf.level = confidence level (default 0.95)
# replicates = number of simulations
# prepare inputs
num_of_events <- x
num_at_risk <- n
# do calculations
risk <- num_of_events/num_at_risk
sim_events <- rbinom(n = replicates, size = num_at_risk, prob = risk)
conf.level <- conf.level
alpha <- 1 - conf.level
ci <- quantile(sim_events, c(alpha/2, 1-alpha/2))
# collect results
list(x = num_of_events,
n = num_at_risk,
conf.int = ci,
conf.level = conf.level)
}
# test
risk.boot(10, 20)
## $x
## [1] 10
##
## $n
## [1] 20
##
## $conf.int
## 2.5% 97.5%
## 6 14
##
## $conf.level
## [1] 0.95
#Exercise 5.3
myRR <- function(x, conf.level = 0.95){
## x is 2x2 table with this layout
## |Outcome| Exposed |
## | | Yes | No | Sum |
## |-------+-----+-----+-----|
## | Yes | a | b | M_1 |
## | No | c | d | M_0 |
## |-------+-----+-----|-----|
## | Sum | N_1 | N_0 | T |
##
## Prepare input
tab <- addmargins(x)
aa <- tab[1, 1]
bb <- tab[1, 2]
N1 <- tab['Sum',1]
N0 <- tab['Sum',2]
## Do calculations
R1 <- aa/N1
R0 <- bb/N0
RR <- R1/R0
Z <- qnorm((1 + conf.level)/2)
logRR <- log(RR)
SE_logRR <- sqrt(1/aa - 1/N1 + 1/bb - 1/N0)
CI <- exp(logRR + c(-1, 1) * Z * SE_logRR)
pv <- fisher.test(tab[1:2,1:2])$p.value
## Collect results
list(data = tab,
risks = prop.table(x, 2),
risk.ratio = RR,
conf.level = conf.level,
conf.int = CI,
p.value = pv)
}
## test function
tab <- matrix(c(273, 289, 77, 61), 2, 2, byrow = TRUE,
dimnames = list (Recovered = c('Yes', 'No'),
Treatment = c('Yes', 'No')))
myRR(tab)
## $data
## Treatment
## Recovered Yes No Sum
## Yes 273 289 562
## No 77 61 138
## Sum 350 350 700
##
## $risks
## Treatment
## Recovered Yes No
## Yes 0.78 0.8257143
## No 0.22 0.1742857
##
## $risk.ratio
## [1] 0.9446367
##
## $conf.level
## [1] 0.95
##
## $conf.int
## [1] 0.8776359 1.0167524
##
## $p.value
## [1] 0.1539742
#Exercise # 5.4
url <- "https://raw.githubusercontent.com/taragonmd/data/master/exported/jp-drugrx-p2.txt"
drugrx <- read.csv(url)
str(drugrx)
## 'data.frame': 700 obs. of 4 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Recovered: Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ Drug : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ Gender : Factor w/ 2 levels "Men","Women": 1 1 1 1 1 1 1 1 1 1 ...
ALL
tab_all <- xtabs(~ Recovered + Drug, data = drugrx)[2:1,2:1]
myRR(tab_all)
## $data
## Drug
## Recovered Yes No Sum
## Yes 273 289 562
## No 77 61 138
## Sum 350 350 700
##
## $risks
## Drug
## Recovered Yes No
## Yes 0.7800000 0.8257143
## No 0.2200000 0.1742857
##
## $risk.ratio
## [1] 0.9446367
##
## $conf.level
## [1] 0.95
##
## $conf.int
## [1] 0.8776359 1.0167524
##
## $p.value
## [1] 0.1539742
MEN
tab_men <- xtabs(~ Recovered + Drug + Gender, data = drugrx)[2:1,2:1,'Men']
myRR(tab_men)
## $data
## Drug
## Recovered Yes No Sum
## Yes 81 234 315
## No 6 36 42
## Sum 87 270 357
##
## $risks
## Drug
## Recovered Yes No
## Yes 0.93103448 0.86666667
## No 0.06896552 0.13333333
##
## $risk.ratio
## [1] 1.074271
##
## $conf.level
## [1] 0.95
##
## $conf.int
## [1] 0.9977554 1.1566534
##
## $p.value
## [1] 0.126632
WOMEN
tab_women <- xtabs(~ Recovered + Drug + Gender, data = drugrx)[2:1,2:1,'Women']
myRR(tab_women)
## $data
## Drug
## Recovered Yes No Sum
## Yes 192 55 247
## No 71 25 96
## Sum 263 80 343
##
## $risks
## Drug
## Recovered Yes No
## Yes 0.730038 0.687500
## No 0.269962 0.312500
##
## $risk.ratio
## [1] 1.061873
##
## $conf.level
## [1] 0.95
##
## $conf.int
## [1] 0.9003483 1.2523768
##
## $p.value
## [1] 0.4786164
#Excercise 5.5
oswego2 <- read.csv("https://raw.githubusercontent.com/taragonmd/data/master/oswego/oswego2.txt", strip.white = TRUE)
food.item <- c("Baked.ham", "Spinach", "Mashed.potato", "Cabbage.salad",
"Jello", "Rolls", "Brown.bread", "Milk", "Coffee", "Water", "Cakes",
"Vanilla.ice.cream", "Chocolate.ice.cream", "Fruit.salad")
names(oswego2)[9:22] <- food.item
str(oswego2)
## 'data.frame': 75 obs. of 22 variables:
## $ ID : int 2 3 4 6 7 8 9 10 14 16 ...
## $ Age : int 52 65 59 63 70 40 15 33 10 32 ...
## $ Sex : Factor w/ 2 levels "F","M": 1 2 1 1 2 1 1 1 2 1 ...
## $ MealDate : Factor w/ 1 level "04/18/1940": 1 1 1 1 1 1 1 1 1 1 ...
## $ MealTime : Factor w/ 6 levels "11:00:00","18:30:00",..: 5 2 2 4 4 4 6 3 4 NA ...
## $ Ill : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 2 2 2 2 ...
## $ OnsetDate : Factor w/ 2 levels "4/18/1940","4/19/1940": 2 2 2 1 1 2 2 1 2 2 ...
## $ OnsetTime : Factor w/ 17 levels "00:00:00","00:30:00",..: 2 2 2 15 15 4 3 16 4 7 ...
## $ Baked.ham : Factor w/ 2 levels "N","Y": 2 2 2 2 2 1 1 2 1 2 ...
## $ Spinach : Factor w/ 2 levels "N","Y": 2 2 2 2 2 1 1 2 1 2 ...
## $ Mashed.potato : Factor w/ 2 levels "N","Y": 2 2 1 1 2 1 1 2 1 1 ...
## $ Cabbage.salad : Factor w/ 2 levels "N","Y": 1 2 1 2 1 1 1 1 1 1 ...
## $ Jello : Factor w/ 2 levels "N","Y": 1 1 1 2 2 1 1 1 1 1 ...
## $ Rolls : Factor w/ 2 levels "N","Y": 2 1 1 1 2 1 1 2 1 2 ...
## $ Brown.bread : Factor w/ 2 levels "N","Y": 1 1 1 1 2 1 1 2 1 1 ...
## $ Milk : Factor w/ 2 levels "N","Y": 1 1 1 1 1 1 1 1 1 1 ...
## $ Coffee : Factor w/ 2 levels "N","Y": 2 2 2 1 2 1 1 1 1 2 ...
## $ Water : Factor w/ 2 levels "N","Y": 1 1 1 2 2 1 1 2 1 1 ...
## $ Cakes : Factor w/ 2 levels "N","Y": 1 1 2 1 1 1 2 1 1 2 ...
## $ Vanilla.ice.cream : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 1 2 2 2 ...
## $ Chocolate.ice.cream: Factor w/ 2 levels "N","Y": 1 2 2 1 1 2 2 2 2 2 ...
## $ Fruit.salad : Factor w/ 2 levels "N","Y": 1 1 1 1 1 1 1 1 1 1 ...
head(oswego2)
## ID Age Sex MealDate MealTime Ill OnsetDate OnsetTime Baked.ham Spinach
## 1 2 52 F 04/18/1940 20:00:00 Y 4/19/1940 00:30:00 Y Y
## 2 3 65 M 04/18/1940 18:30:00 Y 4/19/1940 00:30:00 Y Y
## 3 4 59 F 04/18/1940 18:30:00 Y 4/19/1940 00:30:00 Y Y
## 4 6 63 F 04/18/1940 19:30:00 Y 4/18/1940 22:30:00 Y Y
## 5 7 70 M 04/18/1940 19:30:00 Y 4/18/1940 22:30:00 Y Y
## 6 8 40 F 04/18/1940 19:30:00 Y 4/19/1940 02:00:00 N N
## Mashed.potato Cabbage.salad Jello Rolls Brown.bread Milk Coffee Water
## 1 Y N N Y N N Y N
## 2 Y Y N N N N Y N
## 3 N N N N N N Y N
## 4 N Y Y N N N N Y
## 5 Y N Y Y Y N Y Y
## 6 N N N N N N N N
## Cakes Vanilla.ice.cream Chocolate.ice.cream Fruit.salad
## 1 N Y N N
## 2 N Y Y N
## 3 Y Y Y N
## 4 N Y N N
## 5 N Y N N
## 6 N Y Y N
#Ex 5.5
tab1 = xtabs(~ Ill + Spinach, data = oswego2)
tab1
## Spinach
## Ill N Y
## N 12 17
## Y 20 26
epitable <- function(exposure, outcome, digits = 3, ...){
x <- xtabs(~ exposure + outcome, na.action=na.pass)
rowtot <- apply(x, 1, sum)
rowdist <- sweep(x, 1, rowtot, '/')
risk0 <- rowdist['N','Y']
risk1 <- rowdist['Y','Y']
rr <- risk1/risk0
or <- (risk1/(1-risk1)) / (risk0/(1-risk0))
ft <- fisher.test(x, ...)
pv <- ft$p.value
results <- signif(c(risk0, risk1, rr, or, pv), digits = digits)
names(results) <- c('R_0', 'R_1', 'RR', 'OR', 'p.value')
list(data = x, results = results)
}
epitable(oswego2$Baked.ham, oswego2$Ill)
## $data
## outcome
## exposure N Y
## N 12 17
## Y 17 29
##
## $results
## R_0 R_1 RR OR p.value
## 0.586 0.630 1.080 1.200 0.809
epitable(oswego2$Spinach, oswego2$Ill)
## $data
## outcome
## exposure N Y
## N 12 20
## Y 17 26
##
## $results
## R_0 R_1 RR OR p.value
## 0.625 0.605 0.967 0.918 1.000
epitable(oswego2$Mashed.potato, oswego2$Ill)
## $data
## outcome
## exposure N Y
## N 14 23
## Y 14 23
##
## $results
## R_0 R_1 RR OR p.value
## 0.622 0.622 1.000 1.000 1.000
epitable(oswego2$Cabbage.salad, oswego2$Ill)
## $data
## outcome
## exposure N Y
## N 19 28
## Y 10 18
##
## $results
## R_0 R_1 RR OR p.value
## 0.596 0.643 1.080 1.220 0.808
epitable(oswego2$Jello, oswego2$Ill)
## $data
## outcome
## exposure N Y
## N 22 30
## Y 7 16
##
## $results
## R_0 R_1 RR OR p.value
## 0.577 0.696 1.210 1.680 0.442
epitable(oswego2$Rolls, oswego2$Ill)
## $data
## outcome
## exposure N Y
## N 13 25
## Y 16 21
##
## $results
## R_0 R_1 RR OR p.value
## 0.658 0.568 0.863 0.682 0.482
epitable(oswego2$Milk, oswego2$Ill)
## $data
## outcome
## exposure N Y
## N 27 44
## Y 2 2
##
## $results
## R_0 R_1 RR OR p.value
## 0.620 0.500 0.807 0.614 0.638
epitable(oswego2$Coffee, oswego2$Ill)
## $data
## outcome
## exposure N Y
## N 17 27
## Y 12 19
##
## $results
## R_0 R_1 RR OR p.value
## 0.614 0.613 0.999 0.997 1.000
epitable(oswego2$Water, oswego2$Ill)
## $data
## outcome
## exposure N Y
## N 18 33
## Y 11 13
##
## $results
## R_0 R_1 RR OR p.value
## 0.647 0.542 0.837 0.645 0.450
epitable(oswego2$Cakes, oswego2$Ill)
## $data
## outcome
## exposure N Y
## N 16 19
## Y 13 27
##
## $results
## R_0 R_1 RR OR p.value
## 0.543 0.675 1.240 1.750 0.342
epitable(oswego2$Vanilla.ice.cream, oswego2$Ill)
## $data
## outcome
## exposure N Y
## N 18 3
## Y 11 43
##
## $results
## R_0 R_1 RR OR p.value
## 1.43e-01 7.96e-01 5.57e+00 2.35e+01 2.60e-07
epitable(oswego2$Chocolate.ice.cream, oswego2$Ill)
## $data
## outcome
## exposure N Y
## N 7 20
## Y 22 25
##
## $results
## R_0 R_1 RR OR p.value
## 0.7410 0.5320 0.7180 0.3980 0.0891
epitable(oswego2$Fruit.salad, oswego2$Ill)
## $data
## outcome
## exposure N Y
## N 27 42
## Y 2 4
##
## $results
## R_0 R_1 RR OR p.value
## 0.609 0.667 1.100 1.290 1.000
#Exercise 5.6
outbreak <- matrix(NA, length(food.item), 5)
rownames(outbreak) <- food.item
colnames(outbreak) <- c('R_0','R_1','RR','OR','P value')
for(i in 1:length(food.item)) {
outbreak[i,] <- epitable(oswego2[,food.item[i]], oswego2$Ill)$results
}
outbreak
## R_0 R_1 RR OR P value
## Baked.ham 0.586 0.630 1.080 1.200 8.09e-01
## Spinach 0.625 0.605 0.967 0.918 1.00e+00
## Mashed.potato 0.622 0.622 1.000 1.000 1.00e+00
## Cabbage.salad 0.596 0.643 1.080 1.220 8.08e-01
## Jello 0.577 0.696 1.210 1.680 4.42e-01
## Rolls 0.658 0.568 0.863 0.682 4.82e-01
## Brown.bread 0.583 0.667 1.140 1.430 6.22e-01
## Milk 0.620 0.500 0.807 0.614 6.38e-01
## Coffee 0.614 0.613 0.999 0.997 1.00e+00
## Water 0.647 0.542 0.837 0.645 4.50e-01
## Cakes 0.543 0.675 1.240 1.750 3.42e-01
## Vanilla.ice.cream 0.143 0.796 5.570 23.500 2.60e-07
## Chocolate.ice.cream 0.741 0.532 0.718 0.398 8.91e-02
## Fruit.salad 0.609 0.667 1.100 1.290 1.00e+00
#Exercise 5.7
myepitab <- function(x) {
epitable(x, oswego2$Ill)$results
}
lapply(oswego2[, food.item], myepitab)
## $Baked.ham
## R_0 R_1 RR OR p.value
## 0.586 0.630 1.080 1.200 0.809
##
## $Spinach
## R_0 R_1 RR OR p.value
## 0.625 0.605 0.967 0.918 1.000
##
## $Mashed.potato
## R_0 R_1 RR OR p.value
## 0.622 0.622 1.000 1.000 1.000
##
## $Cabbage.salad
## R_0 R_1 RR OR p.value
## 0.596 0.643 1.080 1.220 0.808
##
## $Jello
## R_0 R_1 RR OR p.value
## 0.577 0.696 1.210 1.680 0.442
##
## $Rolls
## R_0 R_1 RR OR p.value
## 0.658 0.568 0.863 0.682 0.482
##
## $Brown.bread
## R_0 R_1 RR OR p.value
## 0.583 0.667 1.140 1.430 0.622
##
## $Milk
## R_0 R_1 RR OR p.value
## 0.620 0.500 0.807 0.614 0.638
##
## $Coffee
## R_0 R_1 RR OR p.value
## 0.614 0.613 0.999 0.997 1.000
##
## $Water
## R_0 R_1 RR OR p.value
## 0.647 0.542 0.837 0.645 0.450
##
## $Cakes
## R_0 R_1 RR OR p.value
## 0.543 0.675 1.240 1.750 0.342
##
## $Vanilla.ice.cream
## R_0 R_1 RR OR p.value
## 1.43e-01 7.96e-01 5.57e+00 2.35e+01 2.60e-07
##
## $Chocolate.ice.cream
## R_0 R_1 RR OR p.value
## 0.7410 0.5320 0.7180 0.3980 0.0891
##
## $Fruit.salad
## R_0 R_1 RR OR p.value
## 0.609 0.667 1.100 1.290 1.000
t(sapply(oswego2[, food.item], myepitab)) # transpose output
## R_0 R_1 RR OR p.value
## Baked.ham 0.586 0.630 1.080 1.200 8.09e-01
## Spinach 0.625 0.605 0.967 0.918 1.00e+00
## Mashed.potato 0.622 0.622 1.000 1.000 1.00e+00
## Cabbage.salad 0.596 0.643 1.080 1.220 8.08e-01
## Jello 0.577 0.696 1.210 1.680 4.42e-01
## Rolls 0.658 0.568 0.863 0.682 4.82e-01
## Brown.bread 0.583 0.667 1.140 1.430 6.22e-01
## Milk 0.620 0.500 0.807 0.614 6.38e-01
## Coffee 0.614 0.613 0.999 0.997 1.00e+00
## Water 0.647 0.542 0.837 0.645 4.50e-01
## Cakes 0.543 0.675 1.240 1.750 3.42e-01
## Vanilla.ice.cream 0.143 0.796 5.570 23.500 2.60e-07
## Chocolate.ice.cream 0.741 0.532 0.718 0.398 8.91e-02
## Fruit.salad 0.609 0.667 1.100 1.290 1.00e+00
#Exercise 5.8
a <- 1
b <- 2
f <- function(x) {
a * x + b
}
g <- function(x) {
a <- 2
b <- 1
f(x)
}
g(2)
## [1] 4
#Exercise 5.9
g <- function(x) {
a <- 2
b <- 1
f <- function(x) {
a * x + b
}
f(x)
}
g(2)
## [1] 5