Problem 1

getwd()
## [1] "C:/Users/marss/Documents/ph251d-homework"

Problem 2

table227 <- matrix(c(139, 443, 230, 502), 2, 2)
rownames(table227) <- c('Dead', 'Alive')
colnames(table227) <- c('Yes', 'No')
names(dimnames(table227)) <- c('Vital Status', 'Smoking')

table227
##             Smoking
## Vital Status Yes  No
##        Dead  139 230
##        Alive 443 502

Problem 3

coltot <- apply(table227, 2, sum)
table228 <- rbind(table227, Total = coltot)

rtot <- apply(table228, 1, sum)            
table228 <- cbind(table228, Total = rtot)

table228
##       Yes  No Total
## Dead  139 230   369
## Alive 443 502   945
## Total 582 732  1314

Problem 4

Marginal Distributions

#Row Marginal
rsum <- apply(table227, 1, sum)
rdist <- sweep(table227, 1, rsum, '/')
rdist
##             Smoking
## Vital Status       Yes        No
##        Dead  0.3766938 0.6233062
##        Alive 0.4687831 0.5312169
#Column Marginal
csum <- apply(table227, 2, sum)
cdist <- sweep(table227, 2, csum, '/')
cdist
##             Smoking
## Vital Status       Yes        No
##        Dead  0.2388316 0.3142077
##        Alive 0.7611684 0.6857923
#Joint Distribution 
total <- sum(table227)
jdist <- table227/total
jdist
##             Smoking
## Vital Status       Yes        No
##        Dead  0.1057839 0.1750381
##        Alive 0.3371385 0.3820396

Problem 5

risks <- table227['Dead', ]/csum
risk.ratio <- risks/risks[2]  
odds <- risks/(1 - risks)
odds.ratio <- odds/odds[2]
rbind(risks, risk.ratio, odds, odds.ratio)
##                  Yes        No
## risks      0.2388316 0.3142077
## risk.ratio 0.7601076 1.0000000
## odds       0.3137698 0.4581673
## odds.ratio 0.6848366 1.0000000

Risk of death is lower among smokers than non-smokers in this study.

Problem 6

library(mosaicData)
data(Whickham)
wdat <- Whickham
str(wdat)
## 'data.frame':    1314 obs. of  3 variables:
##  $ outcome: Factor w/ 2 levels "Alive","Dead": 1 1 2 1 1 1 1 2 1 1 ...
##  $ smoker : Factor w/ 2 levels "No","Yes": 2 2 2 1 1 2 2 1 1 1 ...
##  $ age    : int  23 18 71 67 64 38 45 76 28 27 ...
wdat$agecat4 <- cut(wdat$age, breaks=c(15,25,45,65,100), 
                    right=FALSE)
table(wdat$agecat4)
## 
##  [15,25)  [25,45)  [45,65) [65,100) 
##      127      497      447      243

2 Way Contigency Table

two_way_table <- xtabs(~outcome + smoker, wdat)

addmargins(two_way_table)
##        smoker
## outcome   No  Yes  Sum
##   Alive  502  443  945
##   Dead   230  139  369
##   Sum    732  582 1314
csum2WAY <- apply(two_way_table, 2, sum)
rsum2WAY <- apply(two_way_table, 1, sum)

risks2 <- two_way_table['Dead', ]/csum2WAY
risk.ratio2 <- risks2/risks2[2]  
odds2 <- risks2/(1 - risks2)
odds.ratio2 <- odds2/odds2[2]
rbind(risks2, risk.ratio2, odds2, odds.ratio2)
##                    No       Yes
## risks2      0.3142077 0.2388316
## risk.ratio2 1.3156033 1.0000000
## odds2       0.4581673 0.3137698
## odds.ratio2 1.4602024 1.0000000

Smokers had a lower risk of death than non-smokers.

3 Way Contigency Table

three_way_table <- xtabs(~outcome + smoker + wdat$agecat4, wdat) 

addmargins(three_way_table)
## , , wdat$agecat4 = [15,25)
## 
##        smoker
## outcome   No  Yes  Sum
##   Alive   71   53  124
##   Dead     1    2    3
##   Sum     72   55  127
## 
## , , wdat$agecat4 = [25,45)
## 
##        smoker
## outcome   No  Yes  Sum
##   Alive  256  217  473
##   Dead    11   13   24
##   Sum    267  230  497
## 
## , , wdat$agecat4 = [45,65)
## 
##        smoker
## outcome   No  Yes  Sum
##   Alive  147  167  314
##   Dead    53   80  133
##   Sum    200  247  447
## 
## , , wdat$agecat4 = [65,100)
## 
##        smoker
## outcome   No  Yes  Sum
##   Alive   28    6   34
##   Dead   165   44  209
##   Sum    193   50  243
## 
## , , wdat$agecat4 = Sum
## 
##        smoker
## outcome   No  Yes  Sum
##   Alive  502  443  945
##   Dead   230  139  369
##   Sum    732  582 1314
sweep(three_way_table, c(2,3), apply(three_way_table, c(2,3),sum), '/')
## , , wdat$agecat4 = [15,25)
## 
##        smoker
## outcome         No        Yes
##   Alive 0.98611111 0.96363636
##   Dead  0.01388889 0.03636364
## 
## , , wdat$agecat4 = [25,45)
## 
##        smoker
## outcome         No        Yes
##   Alive 0.95880150 0.94347826
##   Dead  0.04119850 0.05652174
## 
## , , wdat$agecat4 = [45,65)
## 
##        smoker
## outcome         No        Yes
##   Alive 0.73500000 0.67611336
##   Dead  0.26500000 0.32388664
## 
## , , wdat$agecat4 = [65,100)
## 
##        smoker
## outcome         No        Yes
##   Alive 0.14507772 0.12000000
##   Dead  0.85492228 0.88000000

Risk of death increases with age. Perhaps also example of Simpson’s paradox, as when the risks are stratified by age the smokers have a consistently higher risk of death while when we looked at the total table, smokers had a lower risk of death. This is probably because the third variable, age, was acting as a lurking variable.