Q2.2 R objects are: Vector: like elements without dimensions Matrix: like elements in two dimensions Array: like elements in n dimensions List: collection of data objects, no restrictions (bins) Data frame: list with two dimensions Function: does a job

Q2.3 Ways to index: 1. by name 2. by position 3. by logical

Q2.4 replaced

Q2.5

deadalive<- matrix(c(139, 230, 443, 502), 2, 2, byrow = TRUE, dimnames = list("Vital Status" = c("Dead", "Alive"), "Smoking" = c("Yes", "No")))
deadalive
##             Smoking
## Vital Status Yes  No
##        Dead  139 230
##        Alive 443 502

Q2.6

smoking<-matrix(c(139,230,369,443,502,945,582,732,1314),nrow=3, ncol=3, byrow=TRUE)
dimnames(smoking)<-list("Vital Status"=c("Dead","Alive","Total"),"Smoking"= c("yes","no","total"))
smoking
##             Smoking
## Vital Status yes  no total
##        Dead  139 230   369
##        Alive 443 502   945
##        Total 582 732  1314

Q2.7 Use the sweep and apply functions to calculate marginal and joint distributions.

rowdist <- sweep(smoking, 1, apply(smoking, 1, sum), "/")
rowdist
##             Smoking
## Vital Status    yes     no total
##        Dead  0.1883 0.3117   0.5
##        Alive 0.2344 0.2656   0.5
##        Total 0.2215 0.2785   0.5
coldist <- sweep(smoking, 2, apply(smoking, 2, sum), "/")
coldist
##             Smoking
## Vital Status    yes     no  total
##        Dead  0.1194 0.1571 0.1404
##        Alive 0.3806 0.3429 0.3596
##        Total 0.5000 0.5000 0.5000

Q2.8 Recreate Table 2.41 and interpret the results.

riskyes <- coldist[1,1]
riskno <- coldist [1,2]
risks <- cbind(riskyes, riskno)
risks
##      riskyes riskno
## [1,]  0.1194 0.1571
ratios <- cbind((riskyes/riskno),(riskno/riskno))
oddsyes <- smoking[1,1]/smoking[2,1]
oddsno <- smoking[1,2]/smoking[2,2]
odds <- cbind(oddsyes, oddsno)
oddsratio <- cbind((oddsyes/oddsno), (oddsno/oddsno))
table3 <- rbind(risks, ratios, odds, oddsratio)
dimnames(table3) <-list("Measure" = c("Risk", "Risk Ratio", "Odds", "Odds Ratio"), "Smoking" = c("Yes", "No"))
table3
##             Smoking
## Measure         Yes     No
##   Risk       0.1194 0.1571
##   Risk Ratio 0.7601 1.0000
##   Odds       0.3138 0.4582
##   Odds Ratio 0.6848 1.0000

For smokers, the risk of dying is .76 the risk of dying in this period for non-smokers. If you are a smoker, the odds that you died are .68 the odds of a non-smoker.

Q2.9

wdat = read.csv("http://www.medepi.net/data/whickham.txt", header = TRUE)
str(wdat)
## 'data.frame':    1314 obs. of  3 variables:
##  $ Vital.Status: Factor w/ 2 levels "Alive","Dead": 2 2 1 1 1 1 1 1 1 1 ...
##  $ Smoking     : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Age         : Factor w/ 7 levels "18-24","25-34",..: 1 1 1 1 1 1 1 1 1 1 ...
table4<-xtabs(~Vital.Status + Smoking + Age, data = wdat)
totals <- apply(table4, c(1, 3), sum)
table5<-ftable(table4)
table6<-rbind(table5, totals)
smodeathrisk <- c((table6 [4,1]/ table6 [6,1]), (table6 [4,2]/ table6 [6,2]), (table6 [4,3]/ table6 [6,3]), (table6 [4,4]/ table6 [6,4]),(table6 [4,5]/ table6 [6,5]), (table6 [4,6]/ table6 [6,6]), (table6 [4,7]/ table6 [6,7]))
table8<-aperm(table4, c(3, 1, 2))
table8<-ftable(table8)
coldist <- sweep(table8, 1, apply(table8, 1, sum), "/")
table10<-aperm(table4, c(3, 2, 1))
table10<-ftable(table10)
rowdist <- sweep(table10, 1, apply(table10, 1, sum), "/")
matrix(rowdist, 14, 2, byrow=FALSE)
##         [,1]    [,2]
##  [1,] 0.9839 0.01613
##  [2,] 0.9636 0.03636
##  [3,] 0.9682 0.03185
##  [4,] 0.9758 0.02419
##  [5,] 0.9421 0.05785
##  [6,] 0.8716 0.12844
##  [7,] 0.8462 0.15385
##  [8,] 0.7923 0.20769
##  [9,] 0.6694 0.33058
## [10,] 0.5565 0.44348
## [11,] 0.2171 0.78295
## [12,] 0.1944 0.80556
## [13,] 0.0000 1.00000
## [14,] 0.0000 1.00000
deathriskno<-matrix(c(((rowdist)[1,2]), ((rowdist)[3,2]), ((rowdist)[5,2]), ((rowdist)[7,2]), ((rowdist)[9,2]), ((rowdist)[11,2]), ((rowdist)[13,2])))
deathrisksmo<-matrix(c(((rowdist)[2,2]), ((rowdist)[4,2]), ((rowdist)[6,2]), ((rowdist)[8,2]), ((rowdist)[10,2]), ((rowdist)[12,2]), ((rowdist)[14,2])))
deathriskratio<-deathrisksmo/deathriskno
table11<-cbind(deathrisksmo, deathriskno, deathriskratio)
rownames(table11)<-c("18-24", "25-34", "35-44", "45-54", "55-64", "65-74", "75+")
colnames(table11)<-c("Death risk, smokers", "Death risk, non-smokers", "Risk ratio")
table11
##       Death risk, smokers Death risk, non-smokers Risk ratio
## 18-24             0.03636                 0.01613     2.2545
## 25-34             0.02419                 0.03185     0.7597
## 35-44             0.12844                 0.05785     2.2202
## 45-54             0.20769                 0.15385     1.3500
## 55-64             0.44348                 0.33058     1.3415
## 65-74             0.80556                 0.78295     1.0289
## 75+               1.00000                 1.00000     1.0000

The risk of death in smokers vs. non-smokers evens out in older age groups until the risk ratio is 1. Smoking is a more significant risk factor for death the younger you are.

2.10 Evaluate structure of the data frame. Create a 3-dimensional array using both the table or xtabs function. Now attach the std data frame using the attach function. Create the same 3-dimensional array using both the table or xtabs function.

syph<-read.table("http://www.medepi.net/data/syphilis89c.txt")
str(syph)
## 'data.frame':    44082 obs. of  2 variables:
##  $ V1: Factor w/ 3 levels "Female","Male",..: 3 2 2 2 2 2 2 2 2 2 ...
##  $ V2: Factor w/ 25 levels ",\"Black\",\"<=14\"",..: 17 18 18 20 20 20 20 20 20 20 ...
syph2 <- read.table("http://www.medepi.net/data/syphilis89b.txt")
syph2[1,2]
## [1] ,"Race","Age","Freq"
## 49 Levels: ,"Black","<=14",165 ,"Black","<=14",31 ... ,"White","45-54",55