Problem 1
getwd()
## [1] "C:/Users/marss/Documents/ph251d-homework"
Problem 2
table227 <- matrix(c(139, 443, 230, 502), 2, 2)
rownames(table227) <- c('Dead', 'Alive')
colnames(table227) <- c('Yes', 'No')
names(dimnames(table227)) <- c('Vital Status', 'Smoking')
table227
## Smoking
## Vital Status Yes No
## Dead 139 230
## Alive 443 502
Problem 3
coltot <- apply(table227, 2, sum)
table228 <- rbind(table227, Total = coltot)
rtot <- apply(table228, 1, sum)
table228 <- cbind(table228, Total = rtot)
table228
## Yes No Total
## Dead 139 230 369
## Alive 443 502 945
## Total 582 732 1314
Problem 4
Marginal Distributions
#Row Marginal
rsum <- apply(table227, 1, sum)
rdist <- sweep(table227, 1, rsum, '/')
rdist
## Smoking
## Vital Status Yes No
## Dead 0.3766938 0.6233062
## Alive 0.4687831 0.5312169
#Column Marginal
csum <- apply(table227, 2, sum)
cdist <- sweep(table227, 2, csum, '/')
cdist
## Smoking
## Vital Status Yes No
## Dead 0.2388316 0.3142077
## Alive 0.7611684 0.6857923
#Joint Distribution
total <- sum(table227)
jdist <- table227/total
jdist
## Smoking
## Vital Status Yes No
## Dead 0.1057839 0.1750381
## Alive 0.3371385 0.3820396
Problem 5
risks <- table227['Dead', ]/csum
risk.ratio <- risks/risks[2]
odds <- risks/(1 - risks)
odds.ratio <- odds/odds[2]
rbind(risks, risk.ratio, odds, odds.ratio)
## Yes No
## risks 0.2388316 0.3142077
## risk.ratio 0.7601076 1.0000000
## odds 0.3137698 0.4581673
## odds.ratio 0.6848366 1.0000000
Risk of death is lower among smokers than non-smokers in this study.
Problem 6
library(mosaicData)
data(Whickham)
wdat <- Whickham
str(wdat)
## 'data.frame': 1314 obs. of 3 variables:
## $ outcome: Factor w/ 2 levels "Alive","Dead": 1 1 2 1 1 1 1 2 1 1 ...
## $ smoker : Factor w/ 2 levels "No","Yes": 2 2 2 1 1 2 2 1 1 1 ...
## $ age : int 23 18 71 67 64 38 45 76 28 27 ...
wdat$agecat4 <- cut(wdat$age, breaks=c(15,25,45,65,100),
right=FALSE)
table(wdat$agecat4)
##
## [15,25) [25,45) [45,65) [65,100)
## 127 497 447 243
2 Way Contigency Table
two_way_table <- xtabs(~outcome + smoker, wdat)
addmargins(two_way_table)
## smoker
## outcome No Yes Sum
## Alive 502 443 945
## Dead 230 139 369
## Sum 732 582 1314
csum2WAY <- apply(two_way_table, 2, sum)
rsum2WAY <- apply(two_way_table, 1, sum)
risks2 <- two_way_table['Dead', ]/csum2WAY
risk.ratio2 <- risks2/risks2[2]
odds2 <- risks2/(1 - risks2)
odds.ratio2 <- odds2/odds2[2]
rbind(risks2, risk.ratio2, odds2, odds.ratio2)
## No Yes
## risks2 0.3142077 0.2388316
## risk.ratio2 1.3156033 1.0000000
## odds2 0.4581673 0.3137698
## odds.ratio2 1.4602024 1.0000000
Smokers had a lower risk of death than non-smokers.
3 Way Contigency Table
three_way_table <- xtabs(~outcome + smoker + wdat$agecat4, wdat)
addmargins(three_way_table)
## , , wdat$agecat4 = [15,25)
##
## smoker
## outcome No Yes Sum
## Alive 71 53 124
## Dead 1 2 3
## Sum 72 55 127
##
## , , wdat$agecat4 = [25,45)
##
## smoker
## outcome No Yes Sum
## Alive 256 217 473
## Dead 11 13 24
## Sum 267 230 497
##
## , , wdat$agecat4 = [45,65)
##
## smoker
## outcome No Yes Sum
## Alive 147 167 314
## Dead 53 80 133
## Sum 200 247 447
##
## , , wdat$agecat4 = [65,100)
##
## smoker
## outcome No Yes Sum
## Alive 28 6 34
## Dead 165 44 209
## Sum 193 50 243
##
## , , wdat$agecat4 = Sum
##
## smoker
## outcome No Yes Sum
## Alive 502 443 945
## Dead 230 139 369
## Sum 732 582 1314
sweep(three_way_table, c(2,3), apply(three_way_table, c(2,3),sum), '/')
## , , wdat$agecat4 = [15,25)
##
## smoker
## outcome No Yes
## Alive 0.98611111 0.96363636
## Dead 0.01388889 0.03636364
##
## , , wdat$agecat4 = [25,45)
##
## smoker
## outcome No Yes
## Alive 0.95880150 0.94347826
## Dead 0.04119850 0.05652174
##
## , , wdat$agecat4 = [45,65)
##
## smoker
## outcome No Yes
## Alive 0.73500000 0.67611336
## Dead 0.26500000 0.32388664
##
## , , wdat$agecat4 = [65,100)
##
## smoker
## outcome No Yes
## Alive 0.14507772 0.12000000
## Dead 0.85492228 0.88000000
Risk of death increases with age. Perhaps also example of Simpson’s paradox, as when the risks are stratified by age the smokers have a consistently higher risk of death while when we looked at the total table, smokers had a lower risk of death. This is probably because the third variable, age, was acting as a lurking variable.