Problem 1

# Creating function
riskfunction<- function(x){
  a = x[1, 1]
  b = x[1, 2]
  c = x[2, 1]
  d = x[2, 2]
  N1<-a+c
  N0<-b+d
  M1<-a+b
  M0<-c+d
  TT<-a+b+c+d
  R1<-a/N1
  R0<-b/N0
  RR<-R1/R0
  SE.lnRR<-sqrt(1/a-1/N1+1/b-1/N0)
  Z <- qnorm((1 +0.95)/2)
  CI<-exp(log(RR)+c(-1,1)*Z*SE.lnRR)
  fisher<- fisher.test(x, workspace=2e+07)
  list(
    Data = cbind(rbind(x,c(N1,N0)),c(M1,M0,TT)),
    R1=R1,
    R0=R0,
    Risk.ratio =RR,
    ConfidenceIntervals.95 = CI,
    FisherTest<-fisher
  )
}

table_5.3<-rbind(c(273,289),c(77,61))
 
riskfunction(table_5.3)
## $Data
##      [,1] [,2] [,3]
## [1,]  273  289  562
## [2,]   77   61  138
## [3,]  350  350  700
## 
## $R1
## [1] 0.78
## 
## $R0
## [1] 0.8257143
## 
## $Risk.ratio
## [1] 0.9446367
## 
## $ConfidenceIntervals.95
## [1] 0.8776359 1.0167524
## 
## [[6]]
## 
##  Fisher's Exact Test for Count Data
## 
## data:  x
## p-value = 0.154
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.504886 1.106634
## sample estimates:
## odds ratio 
##  0.7486641

Problem 2

# Read in data
drugrx.df <- read.csv('~/R_Workspace/data-master/jp-drugrx-p2.txt')

# Print full table
drugstotal<- xtabs(~Recovered + Drug,  data = drugrx.df)
drugstotal
##          Drug
## Recovered  No Yes
##       No   61  77
##       Yes 289 273
# Create Stratified Male and Female 2x2s
drugs<-xtabs( ~ Recovered + Drug + Gender, data = drugrx.df)
males<-drugs[,,1]
males
##          Drug
## Recovered  No Yes
##       No   36   6
##       Yes 234  81
females<-drugs[,,2]
females
##          Drug
## Recovered  No Yes
##       No   25  71
##       Yes  55 192
# Need to switch yes and no columns and rows so can use riskfunction correctly, so create tablefunction to do this

tablefunction <- function(y) {
 table <- matrix(c(y[2,2], y[1,2], y[2,2]+y[1,2], y[2,1],y[1,1], y[2,1]+y[1,1]), nrow = 3)
  table <- cbind(table, rowSums(table))
  dimnames(table) <- list(Recovered = c('Yes', 'No', 'Total'), Drug = c('Yes', 'No', 'Total'))
  
  return(table)
}

fixed2x2_drugstotal<-tablefunction(drugstotal)
fixed2x2_drugstotal
##          Drug
## Recovered Yes  No Total
##     Yes   273 289   562
##     No     77  61   138
##     Total 350 350   700
fixed2x2_males<-tablefunction(males)
fixed2x2_males
##          Drug
## Recovered Yes  No Total
##     Yes    81 234   315
##     No      6  36    42
##     Total  87 270   357
fixed2x2_females<-tablefunction(females)
fixed2x2_females
##          Drug
## Recovered Yes No Total
##     Yes   192 55   247
##     No     71 25    96
##     Total 263 80   343
riskfunction(fixed2x2_drugstotal)
## Warning in rbind(x, c(N1, N0)): number of columns of result is not a
## multiple of vector length (arg 2)
## Warning in cbind(rbind(x, c(N1, N0)), c(M1, M0, TT)): number of rows of
## result is not a multiple of vector length (arg 2)
## $Data
##       Yes  No Total    
## Yes   273 289   562 562
## No     77  61   138 138
## Total 350 350   700 700
##       350 350   350 562
## 
## $R1
## [1] 0.78
## 
## $R0
## [1] 0.8257143
## 
## $Risk.ratio
## [1] 0.9446367
## 
## $ConfidenceIntervals.95
## [1] 0.8776359 1.0167524
## 
## [[6]]
## 
##  Fisher's Exact Test for Count Data
## 
## data:  x
## p-value = 0.6799
## alternative hypothesis: two.sided
riskfunction(fixed2x2_males)
## Warning in rbind(x, c(N1, N0)): number of columns of result is not a
## multiple of vector length (arg 2)

## Warning in rbind(x, c(N1, N0)): number of rows of result is not a multiple
## of vector length (arg 2)
## $Data
##       Yes  No Total    
## Yes    81 234   315 315
## No      6  36    42  42
## Total  87 270   357 357
##        87 270    87 315
## 
## $R1
## [1] 0.9310345
## 
## $R0
## [1] 0.8666667
## 
## $Risk.ratio
## [1] 1.074271
## 
## $ConfidenceIntervals.95
## [1] 0.9977554 1.1566534
## 
## [[6]]
## 
##  Fisher's Exact Test for Count Data
## 
## data:  x
## p-value = 0.6322
## alternative hypothesis: two.sided
riskfunction(fixed2x2_females)
## Warning in rbind(x, c(N1, N0)): number of columns of result is not a
## multiple of vector length (arg 2)

## Warning in rbind(x, c(N1, N0)): number of rows of result is not a multiple
## of vector length (arg 2)
## $Data
##       Yes No Total    
## Yes   192 55   247 247
## No     71 25    96  96
## Total 263 80   343 343
##       263 80   263 247
## 
## $R1
## [1] 0.730038
## 
## $R0
## [1] 0.6875
## 
## $Risk.ratio
## [1] 1.061873
## 
## $ConfidenceIntervals.95
## [1] 0.9003483 1.2523768
## 
## [[6]]
## 
##  Fisher's Exact Test for Count Data
## 
## data:  x
## p-value = 0.9644
## alternative hypothesis: two.sided

Answer: Overall the drug is actually slightly worse in terms of recovery than no drug (RR: 0.9446367). When we break it down by gender, males show a slightly higher recovery rate with the drug than without (RR: 1.074271). Similarly women also showed a slightly higher recovery rate while on the drug then without (RR: 1.061873). For all three tables the 95% Confidence Intervals include 1 indicating that there was no significant increase in recovery when using the drug.

Problem 3

oswego2 <- read.csv("~/R_Workspace/data-master/oswego/oswego2.txt", 
  strip.white = TRUE)
food.item <- c("Baked.ham", "Spinach", "Mashed.potato", 
  "Cabbage.salad", "Jello", "Rolls", "Brown.bread", "Milk", 
  "Coffee", "Water", "Cakes", "Vanilla.ice.cream", "Chocolate.ice.cream", 
  "Fruit.salad")
names(oswego2)[9:22] <- food.item
str(oswego2)
## 'data.frame':    75 obs. of  22 variables:
##  $ ID                 : int  2 3 4 6 7 8 9 10 14 16 ...
##  $ Age                : int  52 65 59 63 70 40 15 33 10 32 ...
##  $ Sex                : Factor w/ 2 levels "F","M": 1 2 1 1 2 1 1 1 2 1 ...
##  $ MealDate           : Factor w/ 1 level "04/18/1940": 1 1 1 1 1 1 1 1 1 1 ...
##  $ MealTime           : Factor w/ 6 levels "11:00:00","18:30:00",..: 5 2 2 4 4 4 6 3 4 NA ...
##  $ Ill                : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 2 2 2 2 ...
##  $ OnsetDate          : Factor w/ 2 levels "4/18/1940","4/19/1940": 2 2 2 1 1 2 2 1 2 2 ...
##  $ OnsetTime          : Factor w/ 17 levels "00:00:00","00:30:00",..: 2 2 2 15 15 4 3 16 4 7 ...
##  $ Baked.ham          : Factor w/ 2 levels "N","Y": 2 2 2 2 2 1 1 2 1 2 ...
##  $ Spinach            : Factor w/ 2 levels "N","Y": 2 2 2 2 2 1 1 2 1 2 ...
##  $ Mashed.potato      : Factor w/ 2 levels "N","Y": 2 2 1 1 2 1 1 2 1 1 ...
##  $ Cabbage.salad      : Factor w/ 2 levels "N","Y": 1 2 1 2 1 1 1 1 1 1 ...
##  $ Jello              : Factor w/ 2 levels "N","Y": 1 1 1 2 2 1 1 1 1 1 ...
##  $ Rolls              : Factor w/ 2 levels "N","Y": 2 1 1 1 2 1 1 2 1 2 ...
##  $ Brown.bread        : Factor w/ 2 levels "N","Y": 1 1 1 1 2 1 1 2 1 1 ...
##  $ Milk               : Factor w/ 2 levels "N","Y": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Coffee             : Factor w/ 2 levels "N","Y": 2 2 2 1 2 1 1 1 1 2 ...
##  $ Water              : Factor w/ 2 levels "N","Y": 1 1 1 2 2 1 1 2 1 1 ...
##  $ Cakes              : Factor w/ 2 levels "N","Y": 1 1 2 1 1 1 2 1 1 2 ...
##  $ Vanilla.ice.cream  : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 1 2 2 2 ...
##  $ Chocolate.ice.cream: Factor w/ 2 levels "N","Y": 1 2 2 1 1 2 2 2 2 2 ...
##  $ Fruit.salad        : Factor w/ 2 levels "N","Y": 1 1 1 1 1 1 1 1 1 1 ...
epitable <- function(exposure, outcome, ...){
  #### Updated 2016-10-16
  #### 2x2 table from 'xtabs' function
  #### row names are exposure status
  #### col names are illness outcome
  #### ... = options to fisher.test functions
  x <- xtabs(~ exposure + outcome, na.action=na.pass)
  rowtot <- apply(x, 1, sum)
  rowdist <- sweep(x, 1, rowtot, '/')
  risk0 <- rowdist['N','Y']
  risk1 <- rowdist['Y','Y']
  rr <- risk1/risk0
  or <- (risk1/(1-risk1)) / (risk0/(1-risk0))
  ft <- fisher.test(x, ...)
  pv <- ft$p.value
  results <- unname(round(c(risk0, risk1, rr, or, pv), 2))
  names(results) <- c('R0', 'R1', 'RR', 'OR', 'p.value')
  list(data = x, results = results)
}
# Create table with results
OswegoTest<-NULL
i=9
while(i<=22){
  OswegoTest<-rbind(OswegoTest,epitable(oswego2[,i], oswego2$Ill)$results)
  i<-i+1
}

OswegoTest<-cbind(food.item,OswegoTest)
OswegoTest
##       food.item             R0     R1     RR     OR      p.value
##  [1,] "Baked.ham"           "0.59" "0.63" "1.08" "1.2"   "0.81" 
##  [2,] "Spinach"             "0.62" "0.6"  "0.97" "0.92"  "1"    
##  [3,] "Mashed.potato"       "0.62" "0.62" "1"    "1"     "1"    
##  [4,] "Cabbage.salad"       "0.6"  "0.64" "1.08" "1.22"  "0.81" 
##  [5,] "Jello"               "0.58" "0.7"  "1.21" "1.68"  "0.44" 
##  [6,] "Rolls"               "0.66" "0.57" "0.86" "0.68"  "0.48" 
##  [7,] "Brown.bread"         "0.58" "0.67" "1.14" "1.43"  "0.62" 
##  [8,] "Milk"                "0.62" "0.5"  "0.81" "0.61"  "0.64" 
##  [9,] "Coffee"              "0.61" "0.61" "1"    "1"     "1"    
## [10,] "Water"               "0.65" "0.54" "0.84" "0.64"  "0.45" 
## [11,] "Cakes"               "0.54" "0.68" "1.24" "1.75"  "0.34" 
## [12,] "Vanilla.ice.cream"   "0.14" "0.8"  "5.57" "23.45" "0"    
## [13,] "Chocolate.ice.cream" "0.74" "0.53" "0.72" "0.4"   "0.09" 
## [14,] "Fruit.salad"         "0.61" "0.67" "1.1"  "1.29"  "1"

Vanilla Ice Cream appears to be the most likely candidate. The RR is 5.57, the OR is 23.45, and the p-value is less than 0.05. Both RR and OR are much higher for vanilla ice cream than any other food item.

Problem 4

OswegoTestAutomated <- matrix(NA, length(food.item), 5) 
rownames(OswegoTestAutomated) <- food.item
colnames(OswegoTestAutomated) <- c( 'R.0' , 'R.1' , 'RR' , 'OR' , 'P value' ) 
for(i in 1:length(food.item)) {
  OswegoTestAutomated[i,] <- epitable(oswego2[,food.item[i]], oswego2$Ill)$results 
}

OswegoTestAutomated
##                      R.0  R.1   RR    OR P value
## Baked.ham           0.59 0.63 1.08  1.20    0.81
## Spinach             0.62 0.60 0.97  0.92    1.00
## Mashed.potato       0.62 0.62 1.00  1.00    1.00
## Cabbage.salad       0.60 0.64 1.08  1.22    0.81
## Jello               0.58 0.70 1.21  1.68    0.44
## Rolls               0.66 0.57 0.86  0.68    0.48
## Brown.bread         0.58 0.67 1.14  1.43    0.62
## Milk                0.62 0.50 0.81  0.61    0.64
## Coffee              0.61 0.61 1.00  1.00    1.00
## Water               0.65 0.54 0.84  0.64    0.45
## Cakes               0.54 0.68 1.24  1.75    0.34
## Vanilla.ice.cream   0.14 0.80 5.57 23.45    0.00
## Chocolate.ice.cream 0.74 0.53 0.72  0.40    0.09
## Fruit.salad         0.61 0.67 1.10  1.29    1.00

Problem 5

lapply(oswego2[,food.item], function(x) epitable(x, oswego2$Ill)$results)
## $Baked.ham
##      R0      R1      RR      OR p.value 
##    0.59    0.63    1.08    1.20    0.81 
## 
## $Spinach
##      R0      R1      RR      OR p.value 
##    0.62    0.60    0.97    0.92    1.00 
## 
## $Mashed.potato
##      R0      R1      RR      OR p.value 
##    0.62    0.62    1.00    1.00    1.00 
## 
## $Cabbage.salad
##      R0      R1      RR      OR p.value 
##    0.60    0.64    1.08    1.22    0.81 
## 
## $Jello
##      R0      R1      RR      OR p.value 
##    0.58    0.70    1.21    1.68    0.44 
## 
## $Rolls
##      R0      R1      RR      OR p.value 
##    0.66    0.57    0.86    0.68    0.48 
## 
## $Brown.bread
##      R0      R1      RR      OR p.value 
##    0.58    0.67    1.14    1.43    0.62 
## 
## $Milk
##      R0      R1      RR      OR p.value 
##    0.62    0.50    0.81    0.61    0.64 
## 
## $Coffee
##      R0      R1      RR      OR p.value 
##    0.61    0.61    1.00    1.00    1.00 
## 
## $Water
##      R0      R1      RR      OR p.value 
##    0.65    0.54    0.84    0.64    0.45 
## 
## $Cakes
##      R0      R1      RR      OR p.value 
##    0.54    0.68    1.24    1.75    0.34 
## 
## $Vanilla.ice.cream
##      R0      R1      RR      OR p.value 
##    0.14    0.80    5.57   23.45    0.00 
## 
## $Chocolate.ice.cream
##      R0      R1      RR      OR p.value 
##    0.74    0.53    0.72    0.40    0.09 
## 
## $Fruit.salad
##      R0      R1      RR      OR p.value 
##    0.61    0.67    1.10    1.29    1.00
t(sapply(oswego2[,food.item], function(x) epitable(x, oswego2$Ill)$results))
##                       R0   R1   RR    OR p.value
## Baked.ham           0.59 0.63 1.08  1.20    0.81
## Spinach             0.62 0.60 0.97  0.92    1.00
## Mashed.potato       0.62 0.62 1.00  1.00    1.00
## Cabbage.salad       0.60 0.64 1.08  1.22    0.81
## Jello               0.58 0.70 1.21  1.68    0.44
## Rolls               0.66 0.57 0.86  0.68    0.48
## Brown.bread         0.58 0.67 1.14  1.43    0.62
## Milk                0.62 0.50 0.81  0.61    0.64
## Coffee              0.61 0.61 1.00  1.00    1.00
## Water               0.65 0.54 0.84  0.64    0.45
## Cakes               0.54 0.68 1.24  1.75    0.34
## Vanilla.ice.cream   0.14 0.80 5.57 23.45    0.00
## Chocolate.ice.cream 0.74 0.53 0.72  0.40    0.09
## Fruit.salad         0.61 0.67 1.10  1.29    1.00

Answer: Both lapply and sapply take each food item from the oswego2 data and use the epitable function to create the results and then put them into some sort of combined format. The difference is that lapply puts the results into a list while sapply puts them into a matrix/table.

Problem 6

a = 1
b = 2
f <- function(x) {
  a * x + b
}
g <- function(x) {
  a = 2
  b = 1
  f(x)
}
g(2)
## [1] 4

This is called a scope. A and b are set as global variables, where a = 1 and b = 2. However, because g() calls up f(), even though g() changes a and b to different numbers (a=2 , b=1), when g() calls f() the values for a and b reset to the global values of a=1 and b=2. Hence we get 4 (1x2+2=4).

Problem 7

g <- function(x) {
  a = 2
  b = 1
  f <- function(x) {
    a * x + b
  }
  f(x)
}
g(2)
## [1] 5

This results in a differnt answer because in this case the f() function is part of the g() function, so when g() changes the values of a and b they actually carry over to f() unlike the previous problem. Thus we get 5 (2x2+1=5).