Problem 1
# Creating function
riskfunction<- function(x){
a = x[1, 1]
b = x[1, 2]
c = x[2, 1]
d = x[2, 2]
N1<-a+c
N0<-b+d
M1<-a+b
M0<-c+d
TT<-a+b+c+d
R1<-a/N1
R0<-b/N0
RR<-R1/R0
SE.lnRR<-sqrt(1/a-1/N1+1/b-1/N0)
Z <- qnorm((1 +0.95)/2)
CI<-exp(log(RR)+c(-1,1)*Z*SE.lnRR)
fisher<- fisher.test(x, workspace=2e+07)
list(
Data = cbind(rbind(x,c(N1,N0)),c(M1,M0,TT)),
R1=R1,
R0=R0,
Risk.ratio =RR,
ConfidenceIntervals.95 = CI,
FisherTest<-fisher
)
}
table_5.3<-rbind(c(273,289),c(77,61))
riskfunction(table_5.3)
## $Data
## [,1] [,2] [,3]
## [1,] 273 289 562
## [2,] 77 61 138
## [3,] 350 350 700
##
## $R1
## [1] 0.78
##
## $R0
## [1] 0.8257143
##
## $Risk.ratio
## [1] 0.9446367
##
## $ConfidenceIntervals.95
## [1] 0.8776359 1.0167524
##
## [[6]]
##
## Fisher's Exact Test for Count Data
##
## data: x
## p-value = 0.154
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.504886 1.106634
## sample estimates:
## odds ratio
## 0.7486641
Problem 2
# Read in data
drugrx.df <- read.csv('~/R_Workspace/data-master/jp-drugrx-p2.txt')
# Print full table
drugstotal<- xtabs(~Recovered + Drug, data = drugrx.df)
drugstotal
## Drug
## Recovered No Yes
## No 61 77
## Yes 289 273
# Create Stratified Male and Female 2x2s
drugs<-xtabs( ~ Recovered + Drug + Gender, data = drugrx.df)
males<-drugs[,,1]
males
## Drug
## Recovered No Yes
## No 36 6
## Yes 234 81
females<-drugs[,,2]
females
## Drug
## Recovered No Yes
## No 25 71
## Yes 55 192
# Need to switch yes and no columns and rows so can use riskfunction correctly, so create tablefunction to do this
tablefunction <- function(y) {
table <- matrix(c(y[2,2], y[1,2], y[2,2]+y[1,2], y[2,1],y[1,1], y[2,1]+y[1,1]), nrow = 3)
table <- cbind(table, rowSums(table))
dimnames(table) <- list(Recovered = c('Yes', 'No', 'Total'), Drug = c('Yes', 'No', 'Total'))
return(table)
}
fixed2x2_drugstotal<-tablefunction(drugstotal)
fixed2x2_drugstotal
## Drug
## Recovered Yes No Total
## Yes 273 289 562
## No 77 61 138
## Total 350 350 700
fixed2x2_males<-tablefunction(males)
fixed2x2_males
## Drug
## Recovered Yes No Total
## Yes 81 234 315
## No 6 36 42
## Total 87 270 357
fixed2x2_females<-tablefunction(females)
fixed2x2_females
## Drug
## Recovered Yes No Total
## Yes 192 55 247
## No 71 25 96
## Total 263 80 343
riskfunction(fixed2x2_drugstotal)
## Warning in rbind(x, c(N1, N0)): number of columns of result is not a
## multiple of vector length (arg 2)
## Warning in cbind(rbind(x, c(N1, N0)), c(M1, M0, TT)): number of rows of
## result is not a multiple of vector length (arg 2)
## $Data
## Yes No Total
## Yes 273 289 562 562
## No 77 61 138 138
## Total 350 350 700 700
## 350 350 350 562
##
## $R1
## [1] 0.78
##
## $R0
## [1] 0.8257143
##
## $Risk.ratio
## [1] 0.9446367
##
## $ConfidenceIntervals.95
## [1] 0.8776359 1.0167524
##
## [[6]]
##
## Fisher's Exact Test for Count Data
##
## data: x
## p-value = 0.6799
## alternative hypothesis: two.sided
riskfunction(fixed2x2_males)
## Warning in rbind(x, c(N1, N0)): number of columns of result is not a
## multiple of vector length (arg 2)
## Warning in rbind(x, c(N1, N0)): number of rows of result is not a multiple
## of vector length (arg 2)
## $Data
## Yes No Total
## Yes 81 234 315 315
## No 6 36 42 42
## Total 87 270 357 357
## 87 270 87 315
##
## $R1
## [1] 0.9310345
##
## $R0
## [1] 0.8666667
##
## $Risk.ratio
## [1] 1.074271
##
## $ConfidenceIntervals.95
## [1] 0.9977554 1.1566534
##
## [[6]]
##
## Fisher's Exact Test for Count Data
##
## data: x
## p-value = 0.6322
## alternative hypothesis: two.sided
riskfunction(fixed2x2_females)
## Warning in rbind(x, c(N1, N0)): number of columns of result is not a
## multiple of vector length (arg 2)
## Warning in rbind(x, c(N1, N0)): number of rows of result is not a multiple
## of vector length (arg 2)
## $Data
## Yes No Total
## Yes 192 55 247 247
## No 71 25 96 96
## Total 263 80 343 343
## 263 80 263 247
##
## $R1
## [1] 0.730038
##
## $R0
## [1] 0.6875
##
## $Risk.ratio
## [1] 1.061873
##
## $ConfidenceIntervals.95
## [1] 0.9003483 1.2523768
##
## [[6]]
##
## Fisher's Exact Test for Count Data
##
## data: x
## p-value = 0.9644
## alternative hypothesis: two.sided
Answer: Overall the drug is actually slightly worse in terms of recovery than no drug (RR: 0.9446367). When we break it down by gender, males show a slightly higher recovery rate with the drug than without (RR: 1.074271). Similarly women also showed a slightly higher recovery rate while on the drug then without (RR: 1.061873). For all three tables the 95% Confidence Intervals include 1 indicating that there was no significant increase in recovery when using the drug.
Problem 3
oswego2 <- read.csv("~/R_Workspace/data-master/oswego/oswego2.txt",
strip.white = TRUE)
food.item <- c("Baked.ham", "Spinach", "Mashed.potato",
"Cabbage.salad", "Jello", "Rolls", "Brown.bread", "Milk",
"Coffee", "Water", "Cakes", "Vanilla.ice.cream", "Chocolate.ice.cream",
"Fruit.salad")
names(oswego2)[9:22] <- food.item
str(oswego2)
## 'data.frame': 75 obs. of 22 variables:
## $ ID : int 2 3 4 6 7 8 9 10 14 16 ...
## $ Age : int 52 65 59 63 70 40 15 33 10 32 ...
## $ Sex : Factor w/ 2 levels "F","M": 1 2 1 1 2 1 1 1 2 1 ...
## $ MealDate : Factor w/ 1 level "04/18/1940": 1 1 1 1 1 1 1 1 1 1 ...
## $ MealTime : Factor w/ 6 levels "11:00:00","18:30:00",..: 5 2 2 4 4 4 6 3 4 NA ...
## $ Ill : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 2 2 2 2 ...
## $ OnsetDate : Factor w/ 2 levels "4/18/1940","4/19/1940": 2 2 2 1 1 2 2 1 2 2 ...
## $ OnsetTime : Factor w/ 17 levels "00:00:00","00:30:00",..: 2 2 2 15 15 4 3 16 4 7 ...
## $ Baked.ham : Factor w/ 2 levels "N","Y": 2 2 2 2 2 1 1 2 1 2 ...
## $ Spinach : Factor w/ 2 levels "N","Y": 2 2 2 2 2 1 1 2 1 2 ...
## $ Mashed.potato : Factor w/ 2 levels "N","Y": 2 2 1 1 2 1 1 2 1 1 ...
## $ Cabbage.salad : Factor w/ 2 levels "N","Y": 1 2 1 2 1 1 1 1 1 1 ...
## $ Jello : Factor w/ 2 levels "N","Y": 1 1 1 2 2 1 1 1 1 1 ...
## $ Rolls : Factor w/ 2 levels "N","Y": 2 1 1 1 2 1 1 2 1 2 ...
## $ Brown.bread : Factor w/ 2 levels "N","Y": 1 1 1 1 2 1 1 2 1 1 ...
## $ Milk : Factor w/ 2 levels "N","Y": 1 1 1 1 1 1 1 1 1 1 ...
## $ Coffee : Factor w/ 2 levels "N","Y": 2 2 2 1 2 1 1 1 1 2 ...
## $ Water : Factor w/ 2 levels "N","Y": 1 1 1 2 2 1 1 2 1 1 ...
## $ Cakes : Factor w/ 2 levels "N","Y": 1 1 2 1 1 1 2 1 1 2 ...
## $ Vanilla.ice.cream : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 1 2 2 2 ...
## $ Chocolate.ice.cream: Factor w/ 2 levels "N","Y": 1 2 2 1 1 2 2 2 2 2 ...
## $ Fruit.salad : Factor w/ 2 levels "N","Y": 1 1 1 1 1 1 1 1 1 1 ...
epitable <- function(exposure, outcome, ...){
#### Updated 2016-10-16
#### 2x2 table from 'xtabs' function
#### row names are exposure status
#### col names are illness outcome
#### ... = options to fisher.test functions
x <- xtabs(~ exposure + outcome, na.action=na.pass)
rowtot <- apply(x, 1, sum)
rowdist <- sweep(x, 1, rowtot, '/')
risk0 <- rowdist['N','Y']
risk1 <- rowdist['Y','Y']
rr <- risk1/risk0
or <- (risk1/(1-risk1)) / (risk0/(1-risk0))
ft <- fisher.test(x, ...)
pv <- ft$p.value
results <- unname(round(c(risk0, risk1, rr, or, pv), 2))
names(results) <- c('R0', 'R1', 'RR', 'OR', 'p.value')
list(data = x, results = results)
}
# Create table with results
OswegoTest<-NULL
i=9
while(i<=22){
OswegoTest<-rbind(OswegoTest,epitable(oswego2[,i], oswego2$Ill)$results)
i<-i+1
}
OswegoTest<-cbind(food.item,OswegoTest)
OswegoTest
## food.item R0 R1 RR OR p.value
## [1,] "Baked.ham" "0.59" "0.63" "1.08" "1.2" "0.81"
## [2,] "Spinach" "0.62" "0.6" "0.97" "0.92" "1"
## [3,] "Mashed.potato" "0.62" "0.62" "1" "1" "1"
## [4,] "Cabbage.salad" "0.6" "0.64" "1.08" "1.22" "0.81"
## [5,] "Jello" "0.58" "0.7" "1.21" "1.68" "0.44"
## [6,] "Rolls" "0.66" "0.57" "0.86" "0.68" "0.48"
## [7,] "Brown.bread" "0.58" "0.67" "1.14" "1.43" "0.62"
## [8,] "Milk" "0.62" "0.5" "0.81" "0.61" "0.64"
## [9,] "Coffee" "0.61" "0.61" "1" "1" "1"
## [10,] "Water" "0.65" "0.54" "0.84" "0.64" "0.45"
## [11,] "Cakes" "0.54" "0.68" "1.24" "1.75" "0.34"
## [12,] "Vanilla.ice.cream" "0.14" "0.8" "5.57" "23.45" "0"
## [13,] "Chocolate.ice.cream" "0.74" "0.53" "0.72" "0.4" "0.09"
## [14,] "Fruit.salad" "0.61" "0.67" "1.1" "1.29" "1"
Vanilla Ice Cream appears to be the most likely candidate. The RR is 5.57, the OR is 23.45, and the p-value is less than 0.05. Both RR and OR are much higher for vanilla ice cream than any other food item.
Problem 4
OswegoTestAutomated <- matrix(NA, length(food.item), 5)
rownames(OswegoTestAutomated) <- food.item
colnames(OswegoTestAutomated) <- c( 'R.0' , 'R.1' , 'RR' , 'OR' , 'P value' )
for(i in 1:length(food.item)) {
OswegoTestAutomated[i,] <- epitable(oswego2[,food.item[i]], oswego2$Ill)$results
}
OswegoTestAutomated
## R.0 R.1 RR OR P value
## Baked.ham 0.59 0.63 1.08 1.20 0.81
## Spinach 0.62 0.60 0.97 0.92 1.00
## Mashed.potato 0.62 0.62 1.00 1.00 1.00
## Cabbage.salad 0.60 0.64 1.08 1.22 0.81
## Jello 0.58 0.70 1.21 1.68 0.44
## Rolls 0.66 0.57 0.86 0.68 0.48
## Brown.bread 0.58 0.67 1.14 1.43 0.62
## Milk 0.62 0.50 0.81 0.61 0.64
## Coffee 0.61 0.61 1.00 1.00 1.00
## Water 0.65 0.54 0.84 0.64 0.45
## Cakes 0.54 0.68 1.24 1.75 0.34
## Vanilla.ice.cream 0.14 0.80 5.57 23.45 0.00
## Chocolate.ice.cream 0.74 0.53 0.72 0.40 0.09
## Fruit.salad 0.61 0.67 1.10 1.29 1.00
Problem 5
lapply(oswego2[,food.item], function(x) epitable(x, oswego2$Ill)$results)
## $Baked.ham
## R0 R1 RR OR p.value
## 0.59 0.63 1.08 1.20 0.81
##
## $Spinach
## R0 R1 RR OR p.value
## 0.62 0.60 0.97 0.92 1.00
##
## $Mashed.potato
## R0 R1 RR OR p.value
## 0.62 0.62 1.00 1.00 1.00
##
## $Cabbage.salad
## R0 R1 RR OR p.value
## 0.60 0.64 1.08 1.22 0.81
##
## $Jello
## R0 R1 RR OR p.value
## 0.58 0.70 1.21 1.68 0.44
##
## $Rolls
## R0 R1 RR OR p.value
## 0.66 0.57 0.86 0.68 0.48
##
## $Brown.bread
## R0 R1 RR OR p.value
## 0.58 0.67 1.14 1.43 0.62
##
## $Milk
## R0 R1 RR OR p.value
## 0.62 0.50 0.81 0.61 0.64
##
## $Coffee
## R0 R1 RR OR p.value
## 0.61 0.61 1.00 1.00 1.00
##
## $Water
## R0 R1 RR OR p.value
## 0.65 0.54 0.84 0.64 0.45
##
## $Cakes
## R0 R1 RR OR p.value
## 0.54 0.68 1.24 1.75 0.34
##
## $Vanilla.ice.cream
## R0 R1 RR OR p.value
## 0.14 0.80 5.57 23.45 0.00
##
## $Chocolate.ice.cream
## R0 R1 RR OR p.value
## 0.74 0.53 0.72 0.40 0.09
##
## $Fruit.salad
## R0 R1 RR OR p.value
## 0.61 0.67 1.10 1.29 1.00
t(sapply(oswego2[,food.item], function(x) epitable(x, oswego2$Ill)$results))
## R0 R1 RR OR p.value
## Baked.ham 0.59 0.63 1.08 1.20 0.81
## Spinach 0.62 0.60 0.97 0.92 1.00
## Mashed.potato 0.62 0.62 1.00 1.00 1.00
## Cabbage.salad 0.60 0.64 1.08 1.22 0.81
## Jello 0.58 0.70 1.21 1.68 0.44
## Rolls 0.66 0.57 0.86 0.68 0.48
## Brown.bread 0.58 0.67 1.14 1.43 0.62
## Milk 0.62 0.50 0.81 0.61 0.64
## Coffee 0.61 0.61 1.00 1.00 1.00
## Water 0.65 0.54 0.84 0.64 0.45
## Cakes 0.54 0.68 1.24 1.75 0.34
## Vanilla.ice.cream 0.14 0.80 5.57 23.45 0.00
## Chocolate.ice.cream 0.74 0.53 0.72 0.40 0.09
## Fruit.salad 0.61 0.67 1.10 1.29 1.00
Answer: Both lapply and sapply take each food item from the oswego2 data and use the epitable function to create the results and then put them into some sort of combined format. The difference is that lapply puts the results into a list while sapply puts them into a matrix/table.
Problem 6
a = 1
b = 2
f <- function(x) {
a * x + b
}
g <- function(x) {
a = 2
b = 1
f(x)
}
g(2)
## [1] 4
This is called a scope. A and b are set as global variables, where a = 1 and b = 2. However, because g() calls up f(), even though g() changes a and b to different numbers (a=2 , b=1), when g() calls f() the values for a and b reset to the global values of a=1 and b=2. Hence we get 4 (1x2+2=4).
Problem 7
g <- function(x) {
a = 2
b = 1
f <- function(x) {
a * x + b
}
f(x)
}
g(2)
## [1] 5
This results in a differnt answer because in this case the f() function is part of the g() function, so when g() changes the values of a and b they actually carry over to f() unlike the previous problem. Thus we get 5 (2x2+1=5).