Calculates the risks R1 and R0. Calculates the risk ratio (RR). Calculates 95% confidence intervals. Uses the fisher.test function (Fisher’s Exact Test) to calculate the two-sided p-value of the null hypothesis of no association. Collects and returns all the results as a ‘list’.#
d <- c(273, 77, 289, 61)
mtx1 <- matrix(d, 2, 2); mtx1
## [,1] [,2]
## [1,] 273 289
## [2,] 77 61
rownames(mtx1) <- c('Survive=Yes', 'Survive=No')
colnames(mtx1) <- c('Treatment=Yes', 'Treatment=No')
mtx1
## Treatment=Yes Treatment=No
## Survive=Yes 273 289
## Survive=No 77 61
rowtot <-apply(mtx1, 1, sum) #row totals
Matrix <- cbind(mtx1, rowtot)
coltot <- apply(Matrix, 2, sum) #column totals
Matrix <- rbind(Matrix, coltot)
Matrix
## Treatment=Yes Treatment=No rowtot
## Survive=Yes 273 289 562
## Survive=No 77 61 138
## coltot 350 350 700
myFunction <- function(x, conf.level){
if(missing(conf.level)) stop("Must specify confidence level")
aa = x[1, 1]
bb = x[1, 2]
cc = x[2, 1]
dd = x[2, 2]
#Calculates the risks R1 and R0.
Risk.Treatment = cc/(aa+cc)
Risk.NoTreat = dd/(bb+dd)
#Calculates the risk ratio (RR).
RR = Risk.Treatment/Risk.NoTreat
#Calculates 95% confidence intervals.
Z <- qnorm((1 + conf.level)/2)
logOR <- log((aa*dd)/(bb*cc))
SE.logOR <- sqrt(1/aa + 1/bb + 1/cc + 1/dd)
OR <- exp(logOR)
CI <- exp(logOR + c(-1, 1)*Z*SE.logOR)
#Uses the fisher.test function (Fisher’s Exact Test) to calculate the two-sided p-value of the null hypothesis of no association.
# ft <- fisher.test(x, ...)
# pv <- ft$p.value
#Collects and returns all the results as a ‘list’.#
list(data = x, TreatmentRisk=Risk.Treatment, NoneTreatmentRisk=Risk.NoTreat, RiskRatio=RR, ConfInt=CI) #, Fisher=pv
}
myFunction(Matrix, 0.95)
## $data
## Treatment=Yes Treatment=No rowtot
## Survive=Yes 273 289 562
## Survive=No 77 61 138
## coltot 350 350 700
##
## $TreatmentRisk
## [1] 0.22
##
## $NoneTreatmentRisk
## [1] 0.1742857
##
## $RiskRatio
## [1] 1.262295
##
## $ConfInt
## [1] 0.5146049 1.0882631
drugrx.df <- read.csv("http://www.medepi.net/data/jp-drugrx-p2.txt")
mode(drugrx.df)
## [1] "list"
str(drugrx.df)
## 'data.frame': 700 obs. of 4 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Recovered: Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ Drug : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ Gender : Factor w/ 2 levels "Men","Women": 1 1 1 1 1 1 1 1 1 1 ...
drugrxxtab <- xtabs(~Drug + Recovered, data = drugrx.df)
drugrxxtab
## Recovered
## Drug No Yes
## No 61 289
## Yes 77 273
myFunction(drugrxxtab, 0.95)
## $data
## Recovered
## Drug No Yes
## No 61 289
## Yes 77 273
##
## $TreatmentRisk
## [1] 0.557971
##
## $NoneTreatmentRisk
## [1] 0.4857651
##
## $RiskRatio
## [1] 1.148644
##
## $ConfInt
## [1] 0.5146049 1.0882631
drugrxF <- subset(drugrx.df, subset = {Gender == 'Women'})
drugrxxtabF <- xtabs(~Drug + Recovered, data = drugrxF)
myFunction(drugrxxtabF, 0.95)
## $data
## Recovered
## Drug No Yes
## No 25 55
## Yes 71 192
##
## $TreatmentRisk
## [1] 0.7395833
##
## $NoneTreatmentRisk
## [1] 0.7773279
##
## $RiskRatio
## [1] 0.9514431
##
## $ConfInt
## [1] 0.7123517 2.1210257
drugrxM <- subset(drugrx.df, subset = {Gender == 'Men'})
drugrxxtabM <- xtabs(~Drug + Recovered, data = drugrxM)
myFunction(drugrxxtabM, 0.95)
## $data
## Recovered
## Drug No Yes
## No 36 234
## Yes 6 81
##
## $TreatmentRisk
## [1] 0.1428571
##
## $NoneTreatmentRisk
## [1] 0.2571429
##
## $RiskRatio
## [1] 0.5555556
##
## $ConfInt
## [1] 0.8440424 5.1106548
Food item R0 R1 RR (risk ratio) OR (odds ratio) Fisher p value Discuss your findings.
oswego2 <- read.csv("http://www.medepi.net/data/oswego/oswego2.txt", strip.white = TRUE)
food.item <- c("Baked.ham", "Spinach", "Mashed.potato", "Cabbage.salad", "Jello", "Rolls", "Brown.bread", "Milk", "Coffee", "Water", "Cakes", "Vanilla.ice.cream", "Chocolate.ice.cream", "Fruit.salad")
names(oswego2)[9:22] <- food.item
str(oswego2)
## 'data.frame': 75 obs. of 22 variables:
## $ ID : int 2 3 4 6 7 8 9 10 14 16 ...
## $ Age : int 52 65 59 63 70 40 15 33 10 32 ...
## $ Sex : Factor w/ 2 levels "F","M": 1 2 1 1 2 1 1 1 2 1 ...
## $ MealDate : Factor w/ 1 level "04/18/1940": 1 1 1 1 1 1 1 1 1 1 ...
## $ MealTime : Factor w/ 6 levels "11:00:00","18:30:00",..: 5 2 2 4 4 4 6 3 4 NA ...
## $ Ill : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 2 2 2 2 ...
## $ OnsetDate : Factor w/ 2 levels "4/18/1940","4/19/1940": 2 2 2 1 1 2 2 1 2 2 ...
## $ OnsetTime : Factor w/ 17 levels "00:00:00","00:30:00",..: 2 2 2 15 15 4 3 16 4 7 ...
## $ Baked.ham : Factor w/ 2 levels "N","Y": 2 2 2 2 2 1 1 2 1 2 ...
## $ Spinach : Factor w/ 2 levels "N","Y": 2 2 2 2 2 1 1 2 1 2 ...
## $ Mashed.potato : Factor w/ 2 levels "N","Y": 2 2 1 1 2 1 1 2 1 1 ...
## $ Cabbage.salad : Factor w/ 2 levels "N","Y": 1 2 1 2 1 1 1 1 1 1 ...
## $ Jello : Factor w/ 2 levels "N","Y": 1 1 1 2 2 1 1 1 1 1 ...
## $ Rolls : Factor w/ 2 levels "N","Y": 2 1 1 1 2 1 1 2 1 2 ...
## $ Brown.bread : Factor w/ 2 levels "N","Y": 1 1 1 1 2 1 1 2 1 1 ...
## $ Milk : Factor w/ 2 levels "N","Y": 1 1 1 1 1 1 1 1 1 1 ...
## $ Coffee : Factor w/ 2 levels "N","Y": 2 2 2 1 2 1 1 1 1 2 ...
## $ Water : Factor w/ 2 levels "N","Y": 1 1 1 2 2 1 1 2 1 1 ...
## $ Cakes : Factor w/ 2 levels "N","Y": 1 1 2 1 1 1 2 1 1 2 ...
## $ Vanilla.ice.cream : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 1 2 2 2 ...
## $ Chocolate.ice.cream: Factor w/ 2 levels "N","Y": 1 2 2 1 1 2 2 2 2 2 ...
## $ Fruit.salad : Factor w/ 2 levels "N","Y": 1 1 1 1 1 1 1 1 1 1 ...
epitable <- function(exposure, outcome, ...){
#### Updated 2016-10-16
#### 2x2 table from 'xtabs' function
#### row names are exposure status
#### col names are illness outcome
#### ... = options to fisher.test functions
x <- xtabs(~ exposure + outcome, na.action=na.pass)
rowtot <- apply(x, 1, sum)
rowdist <- sweep(x, 1, rowtot, '/')
risk0 <- rowdist['N','Y']
risk1 <- rowdist['Y','Y']
rr <- risk1/risk0
or <- (risk1/(1-risk1)) / (risk0/(1-risk0))
ft <- fisher.test(x, ...)
pv <- ft$p.value
results <- unname(round(c(risk0, risk1, rr, or, pv), 2))
names(results) <- c('R0', 'R1', 'RR', 'OR', 'p.value')
list(data = x, results = results)
}
epitable(oswego2$Baked.ham, oswego2$Ill)
## $data
## outcome
## exposure N Y
## N 12 17
## Y 17 29
##
## $results
## R0 R1 RR OR p.value
## 0.59 0.63 1.08 1.20 0.81
I would only ever do this as a loop, not one at a time. I know better than that ;)
outbreak <- matrix(NA, length(food.item), 5)
rownames(outbreak) <- food.item
colnames(outbreak) <- c('R.0','R.1','RR','OR','P value')
for(i in 1:length(food.item)) {outbreak[i,] <- epitable(oswego2[,food.item[i]], oswego2$Ill)$results}
outbreak
## R.0 R.1 RR OR P value
## Baked.ham 0.59 0.63 1.08 1.20 0.81
## Spinach 0.62 0.60 0.97 0.92 1.00
## Mashed.potato 0.62 0.62 1.00 1.00 1.00
## Cabbage.salad 0.60 0.64 1.08 1.22 0.81
## Jello 0.58 0.70 1.21 1.68 0.44
## Rolls 0.66 0.57 0.86 0.68 0.48
## Brown.bread 0.58 0.67 1.14 1.43 0.62
## Milk 0.62 0.50 0.81 0.61 0.64
## Coffee 0.61 0.61 1.00 1.00 1.00
## Water 0.65 0.54 0.84 0.64 0.45
## Cakes 0.54 0.68 1.24 1.75 0.34
## Vanilla.ice.cream 0.14 0.80 5.57 23.45 0.00
## Chocolate.ice.cream 0.74 0.53 0.72 0.40 0.09
## Fruit.salad 0.61 0.67 1.10 1.29 1.00
lapply(oswego2[,food.item], function(x) epitable(x, oswego2$Ill)$results)
## $Baked.ham
## R0 R1 RR OR p.value
## 0.59 0.63 1.08 1.20 0.81
##
## $Spinach
## R0 R1 RR OR p.value
## 0.62 0.60 0.97 0.92 1.00
##
## $Mashed.potato
## R0 R1 RR OR p.value
## 0.62 0.62 1.00 1.00 1.00
##
## $Cabbage.salad
## R0 R1 RR OR p.value
## 0.60 0.64 1.08 1.22 0.81
##
## $Jello
## R0 R1 RR OR p.value
## 0.58 0.70 1.21 1.68 0.44
##
## $Rolls
## R0 R1 RR OR p.value
## 0.66 0.57 0.86 0.68 0.48
##
## $Brown.bread
## R0 R1 RR OR p.value
## 0.58 0.67 1.14 1.43 0.62
##
## $Milk
## R0 R1 RR OR p.value
## 0.62 0.50 0.81 0.61 0.64
##
## $Coffee
## R0 R1 RR OR p.value
## 0.61 0.61 1.00 1.00 1.00
##
## $Water
## R0 R1 RR OR p.value
## 0.65 0.54 0.84 0.64 0.45
##
## $Cakes
## R0 R1 RR OR p.value
## 0.54 0.68 1.24 1.75 0.34
##
## $Vanilla.ice.cream
## R0 R1 RR OR p.value
## 0.14 0.80 5.57 23.45 0.00
##
## $Chocolate.ice.cream
## R0 R1 RR OR p.value
## 0.74 0.53 0.72 0.40 0.09
##
## $Fruit.salad
## R0 R1 RR OR p.value
## 0.61 0.67 1.10 1.29 1.00
“lapply - When you want to apply a function to each element of a list in turn and get a list back.” -http://stackoverflow.com/questions/3505701/r-grouping-functions-sapply-vs-lapply-vs-apply-vs-tapply-vs-by-vs-aggrega
t(sapply(oswego2[,food.item], function(x) epitable(x, oswego2$Ill)$results))
## R0 R1 RR OR p.value
## Baked.ham 0.59 0.63 1.08 1.20 0.81
## Spinach 0.62 0.60 0.97 0.92 1.00
## Mashed.potato 0.62 0.62 1.00 1.00 1.00
## Cabbage.salad 0.60 0.64 1.08 1.22 0.81
## Jello 0.58 0.70 1.21 1.68 0.44
## Rolls 0.66 0.57 0.86 0.68 0.48
## Brown.bread 0.58 0.67 1.14 1.43 0.62
## Milk 0.62 0.50 0.81 0.61 0.64
## Coffee 0.61 0.61 1.00 1.00 1.00
## Water 0.65 0.54 0.84 0.64 0.45
## Cakes 0.54 0.68 1.24 1.75 0.34
## Vanilla.ice.cream 0.14 0.80 5.57 23.45 0.00
## Chocolate.ice.cream 0.74 0.53 0.72 0.40 0.09
## Fruit.salad 0.61 0.67 1.10 1.29 1.00
“sapply - When you want to apply a function to each element of a list in turn, but you want a vector back, rather than a list. … if our function returns vectors of the same length, sapply will use them as columns of a matrix.” -http://stackoverflow.com/questions/3505701/r-grouping-functions-sapply-vs-lapply-vs-apply-vs-tapply-vs-by-vs-aggrega
a = 2
b = 3
f <- function(x) {
a * x + b
}
g <- function(x) {
a = 2
b = 4
f(x)
}
g(2)
## [1] 7
f(2)
## [1] 7
g(5)
## [1] 13
f(5)
## [1] 13
They both appear to do the same thing and have the same outcome. I’m not sure what I’m supposed to get out of this? Sorry.
g <- function(x) {
a = 2
b = 1
f <- function(x) {
a * x + b
}
f(x)
}
g(2)
## [1] 5
This is a nested version of the same code above - instead of running 2 seperate functions, runs one nested function.