##Exercise 2.2
tab <- matrix(c(139, 443, 230, 502), nrow = 2, ncol = 2,
dimnames = list("Outcome" = c("Dead", "Alive"),
Smoker = c("Yes", "No")))
tab <- matrix(c(139, 443, 230, 502), 2, 2)
dimnames(tab) <- list("Outcome" = c("Dead", "Alive"),
Smoker = c("Yes", "No"))
tab <- matrix(c(139, 443, 230, 502), 2, 2)
rownames(tab) <- c("Dead", "Alive")
colnames(tab) <- c("Yes", "No")
names(dimnames(tab)) <- c("Outcome", "Smoker")
##Excerise 2.3
rowt <- apply(tab, 1, sum)
tab2 <- cbind(tab, Total = rowt)
colt <- apply(tab2, 2, sum)
tab2 <- rbind(tab2, Total = colt)
names(dimnames(tab2)) <- c("Outcome", "Smoker")
#Exercise 2.4
rowt <- apply(tab, 1, sum) # row distrib
rowd <- sweep(tab, 1, rowt, "/")
colt <- apply(tab, 2, sum) # col distrib
cold <- sweep(tab, 2, colt, "/")
jtd <- tab/sum(tab) # joint distrib
ptabs <- list(row.distribution = rowd, col.distribution = cold,
joint.distribution = jtd)
#Exercise 2.5
risk = tab2[1, 1:2]/tab2[3, 1:2]
risk.ratio <- risk/risk[2]
odds <- risk/(1 - risk)
odds.ratio <- odds/odds[2]
results <- rbind(risk, risk.ratio, odds, odds.ratio)
results.rounded <- round(results, 2)
##Interpretation: The risk of death among non-smokers is higher than the risk of death among smokers, suggesting that there may be some confounding. ##Exercise 2.6 ##too much trouble loading
##Interpretation: The risk of death is not larger in non-smokers, in fact it is larger among smokers in older age groups.
##Exercise 2.7
Nonsmoker = c('Yes', 'No'); tab
## Smoker
## Outcome Yes No
## Dead 139 230
## Alive 443 502
##Exercise 2.8
hp <-
read.csv("https://raw.githubusercontent.com/taragonmd/data/master/homes.csv",
as.is = TRUE)