PH207x Week 3

Loading data and Deducer

library(foreign)
fhs <- read.dta("./fhs.2d92301d751b.dta")

library(Deducer)
Note: On Mac OS X we strongly recommend using iplots from within JGR.
Proceed at your own risk as iplots cannot resolve potential ev.loop deadlocks.
'Yes' is assumed for all dialogs as they cannot be shown without a deadlock,
also ievent.wait() is disabled.

Homework 3

## part 1
tab.part1 <- xtabs(~ I(bmi1 <= 25) + I(bmi2 <= 25), data = fhs)[2:1,2:1]
addmargins(tab.part1)
             I(bmi2 <= 25)
I(bmi1 <= 25) TRUE FALSE  Sum
        TRUE  1492   278 1770
        FALSE  249  1890 2139
        Sum   1741  2168 3909

list.A.B.C <- list(A = 2139 / 3909,
                   B = 2168 / 3909,
                   C = 1741 / 3909
                   )
list.A.B.C
$A
[1] 0.5472

$B
[1] 0.5546

$C
[1] 0.4454

P(\( A \cap B \)) = 0.4835

P(\( A \cup B \)) = 1 - P(\( \bar A \cap \bar B \)) = 1 - 1890/3990 = 0.5165

\( P(B|A) = \frac {P(A \cap B)} {P(A)} \) = 0.8836

\( P(C|A) = \frac {P(A \cap C)} {P(A)} \) = 0.1164

## part 2
tab.part2 <- xtabs(~ I(cut(age1, c(30,40,50,60,71), right = FALSE)) +I(cursmoke1 == "Yes"), fhs)

tab.part2 <- round(prop.table(tab.part2), 4)

tab.part2
                                                  I(cursmoke1 == "Yes")
I(cut(age1, c(30, 40, 50, 60, 71), right = FALSE))  FALSE   TRUE
                                           [30,40) 0.0519 0.0742
                                           [40,50) 0.1554 0.2262
                                           [50,60) 0.1809 0.1346
                                           [60,71) 0.1200 0.0568

sum(tab.part2[,2])
[1] 0.4918

sum(tab.part2[1:3,])
[1] 0.8232

sum(tab.part2[1:3,1])
[1] 0.3882
## part 3
sens.spec <- function(prevalence, sensitivity, specificity, total.number = 1) {
    p <- prevalence
    sens <- sensitivity
    spec <- specificity

    TP <- p * sens
    FN <- p - TP
    TN <- (1 - p) * spec
    FP <- (1 - p) - TN

    table <- matrix(c(TP,FP,FN,TN), ncol = 2, byrow = TRUE)
    table <- table * total.number

    dimnames(table) <- list(test = c("pos", "neg"),
                            disease = c("pos", "neg"))

    PPV <- TP / (TP + FP)
    NPV <- TN / (TN + FN)
    LR.pos <- sens / (1 - spec)
    LR.neg <- (1 - sens) / spec

    probabilities <- c("PPV P(D+|T+)" = PPV, "P(D-|T+)" = 1 - PPV,
                       "NPV P(D-|T-)" = NPV, "P(D+|T-)" = 1 - NPV,
                       "LR+" = LR.pos, "LR-" = LR.neg)

    list(table = stats::addmargins(table),
         probabilities = probabilities
         )
}

sens.spec(prevalence = 0.161, sensitivity = 0.721, specificity = 0.932)
$table
     disease
test      pos     neg    Sum
  pos 0.11608 0.05705 0.1731
  neg 0.04492 0.78195 0.8269
  Sum 0.16100 0.83900 1.0000

$probabilities
PPV P(D+|T+)     P(D-|T+) NPV P(D-|T-)     P(D+|T-)          LR+          LR- 
     0.67047      0.32953      0.94568      0.05432     10.60294      0.29936 

Titanic <- expand.grid(list(SurvivalStatus = c("Survived","Did not survive"),
                 AgeSex = c("Child","Women","Man"),
                 PassengerClass = c("First","Second","Third")
                 )
            )[,3:1]
Titanic$Frequency <- c(6,0,140,4,57,118,24,0,80,13,14,154,27,52,76,89,75,387)

Titanic
   PassengerClass AgeSex  SurvivalStatus Frequency
1           First  Child        Survived         6
2           First  Child Did not survive         0
3           First  Women        Survived       140
4           First  Women Did not survive         4
5           First    Man        Survived        57
6           First    Man Did not survive       118
7          Second  Child        Survived        24
8          Second  Child Did not survive         0
9          Second  Women        Survived        80
10         Second  Women Did not survive        13
11         Second    Man        Survived        14
12         Second    Man Did not survive       154
13          Third  Child        Survived        27
14          Third  Child Did not survive        52
15          Third  Women        Survived        76
16          Third  Women Did not survive        89
17          Third    Man        Survived        75
18          Third    Man Did not survive       387

## Survival of all women, Survival of all children
p4.q1 <- xtabs(Frequency ~ AgeSex + SurvivalStatus, Titanic)
prop.table(p4.q1, 1)
       SurvivalStatus
AgeSex  Survived Did not survive
  Child   0.5229          0.4771
  Women   0.7363          0.2637
  Man     0.1814          0.8186

## Survival of all women or children
colSums(prop.table(p4.q1[1:2,]))
       Survived Did not survive 
         0.6908          0.3092 

##
p4.q4 <- xtabs(Frequency ~ PassengerClass + SurvivalStatus, Titanic)
prop.table(p4.q4, 1)
              SurvivalStatus
PassengerClass Survived Did not survive
        First    0.6246          0.3754
        Second   0.4140          0.5860
        Third    0.2521          0.7479
## Part 5

fhs <- within(fhs, {
    bmi1.cat <- cut(bmi1, c(-Inf, 18.5, 25, 30, Inf), right = F)
})

library(plyr)

table.part5 <- ddply(subset(fhs, !is.na(bmi1.cat)),
                     "bmi1.cat",
                     summarize,
                     NumberOfSubjects = length(bmi1.cat),
                     NumberOfDeath = sum(death == "Yes"),
                     TotalPersonYears = sum(timedth)
                     )

table.part5
     bmi1.cat NumberOfSubjects NumberOfDeath TotalPersonYears
1 [-Inf,18.5)               57            18             1181
2   [18.5,25)             1936           571            40709
3     [25,30)             1845           689            37674
4   [30, Inf)              577           259            11309
c(bmi1.cat = "Total", colSums(table.part5[,-1]))
          bmi1.cat   NumberOfSubjects      NumberOfDeath   TotalPersonYears 
           "Total"             "4415"             "1537" "90873.1143052704" 

within(table.part5, {
    IncidenceRatePer1000PY = NumberOfDeath / TotalPersonYears * 1000
    Risk = NumberOfDeath / NumberOfSubjects    
})
     bmi1.cat NumberOfSubjects NumberOfDeath TotalPersonYears   Risk IncidenceRatePer1000PY
1 [-Inf,18.5)               57            18             1181 0.3158                  15.24
2   [18.5,25)             1936           571            40709 0.2949                  14.03
3     [25,30)             1845           689            37674 0.3734                  18.29
4   [30, Inf)              577           259            11309 0.4489                  22.90



## part 6
fhs <- within(fhs, {
    underwt <- as.numeric(bmi1 < 18.5)
})

ddply(fhs,
      "underwt",
      summarize,
      anychd_IncidenceRatePer1000PY = sum(anychd == "Yes") / sum(timechd) * 1000
      )
  underwt anychd_IncidenceRatePer1000PY
1       0                        15.444
2       1                         5.359
3      NA                        21.710

ddply(fhs,
      "bmi1.cat",
      summarize,
      anychd_IncidenceRatePer1000PY = sum(anychd == "Yes") / sum(timechd) * 1000
      )
     bmi1.cat anychd_IncidenceRatePer1000PY
1 [-Inf,18.5)                         5.359
2   [18.5,25)                        11.119
3     [25,30)                        18.136
4   [30, Inf)                        23.222
5        <NA>                        21.710


## part 7
fhs <- within(fhs, {
    highbp1 <- NA
    highbp1[sysbp1 >= 140 | diabp1 >= 90] <- 1
    highbp1[sysbp1  < 140 & diabp1  < 90] <- 0
})


ddply(fhs,
      "highbp1",
      summarize,
      anychd_IncidenceRatePer1000PY = sum(anychd == "Yes") / sum(timechd) * 1000
      )
  highbp1 anychd_IncidenceRatePer1000PY
1       0                         11.47
2       1                         23.69