##Exercise 2.2

tab <- matrix(c(139, 443, 230, 502), nrow = 2, ncol = 2,
              dimnames = list("Outcome" = c("Dead", "Alive"),
                              Smoker = c("Yes", "No")))

equivalent

tab <- matrix(c(139, 443, 230, 502), 2, 2)
dimnames(tab) <- list("Outcome" = c("Dead", "Alive"),
                      Smoker = c("Yes", "No"))

equivalent

tab <- matrix(c(139, 443, 230, 502), 2, 2)
rownames(tab) <- c("Dead", "Alive")
colnames(tab) <- c("Yes", "No")
names(dimnames(tab)) <- c("Outcome", "Smoker")

##Excerise 2.3

rowt <- apply(tab, 1, sum)
tab2 <- cbind(tab, Total = rowt)
colt <- apply(tab2, 2, sum)
tab2 <- rbind(tab2, Total = colt)
names(dimnames(tab2)) <- c("Outcome", "Smoker")

#Exercise 2.4

rowt <- apply(tab, 1, sum)          # row distrib
rowd <- sweep(tab, 1, rowt, "/")
colt <- apply(tab, 2, sum)          # col distrib
cold <- sweep(tab, 2, colt, "/")
jtd <- tab/sum(tab)                 # joint distrib
ptabs <- list(row.distribution = rowd, col.distribution = cold, 
              joint.distribution = jtd)

#Exercise 2.5

risk = tab2[1, 1:2]/tab2[3, 1:2]
risk.ratio <- risk/risk[2]
odds <- risk/(1 - risk)
odds.ratio <- odds/odds[2]
results <- rbind(risk, risk.ratio, odds, odds.ratio)
results.rounded <- round(results, 2)

##Interpretation: The risk of death among non-smokers is higher than the risk of death among smokers, suggesting that there may be some confounding. ##Exercise 2.6 ##too much trouble loading

##Interpretation: The risk of death is not larger in non-smokers, in fact it is larger among smokers in older age groups.

##Exercise 2.7

Nonsmoker = c('Yes', 'No'); tab
##        Smoker
## Outcome Yes  No
##   Dead  139 230
##   Alive 443 502

##Exercise 2.8

hp <-
  read.csv("https://raw.githubusercontent.com/taragonmd/data/master/homes.csv",
           as.is = TRUE)