library(foreign)
fhs <- read.dta("./fhs.2d92301d751b.dta")
library(Deducer)
Note: On Mac OS X we strongly recommend using iplots from within JGR.
Proceed at your own risk as iplots cannot resolve potential ev.loop deadlocks.
'Yes' is assumed for all dialogs as they cannot be shown without a deadlock,
also ievent.wait() is disabled.
## part 1
tab.part1 <- xtabs(~ I(bmi1 <= 25) + I(bmi2 <= 25), data = fhs)[2:1,2:1]
addmargins(tab.part1)
I(bmi2 <= 25)
I(bmi1 <= 25) TRUE FALSE Sum
TRUE 1492 278 1770
FALSE 249 1890 2139
Sum 1741 2168 3909
list.A.B.C <- list(A = 2139 / 3909,
B = 2168 / 3909,
C = 1741 / 3909
)
list.A.B.C
$A
[1] 0.5472
$B
[1] 0.5546
$C
[1] 0.4454
P(\( A \cap B \)) = 0.4835
P(\( A \cup B \)) = 1 - P(\( \bar A \cap \bar B \)) = 1 - 1890/3990 = 0.5165
\( P(B|A) = \frac {P(A \cap B)} {P(A)} \) = 0.8836
\( P(C|A) = \frac {P(A \cap C)} {P(A)} \) = 0.1164
## part 2
tab.part2 <- xtabs(~ I(cut(age1, c(30,40,50,60,71), right = FALSE)) +I(cursmoke1 == "Yes"), fhs)
tab.part2 <- round(prop.table(tab.part2), 4)
tab.part2
I(cursmoke1 == "Yes")
I(cut(age1, c(30, 40, 50, 60, 71), right = FALSE)) FALSE TRUE
[30,40) 0.0519 0.0742
[40,50) 0.1554 0.2262
[50,60) 0.1809 0.1346
[60,71) 0.1200 0.0568
sum(tab.part2[,2])
[1] 0.4918
sum(tab.part2[1:3,])
[1] 0.8232
sum(tab.part2[1:3,1])
[1] 0.3882
## part 3
sens.spec <- function(prevalence, sensitivity, specificity, total.number = 1) {
p <- prevalence
sens <- sensitivity
spec <- specificity
TP <- p * sens
FN <- p - TP
TN <- (1 - p) * spec
FP <- (1 - p) - TN
table <- matrix(c(TP,FP,FN,TN), ncol = 2, byrow = TRUE)
table <- table * total.number
dimnames(table) <- list(test = c("pos", "neg"),
disease = c("pos", "neg"))
PPV <- TP / (TP + FP)
NPV <- TN / (TN + FN)
LR.pos <- sens / (1 - spec)
LR.neg <- (1 - sens) / spec
probabilities <- c("PPV P(D+|T+)" = PPV, "P(D-|T+)" = 1 - PPV,
"NPV P(D-|T-)" = NPV, "P(D+|T-)" = 1 - NPV,
"LR+" = LR.pos, "LR-" = LR.neg)
list(table = stats::addmargins(table),
probabilities = probabilities
)
}
sens.spec(prevalence = 0.161, sensitivity = 0.721, specificity = 0.932)
$table
disease
test pos neg Sum
pos 0.11608 0.05705 0.1731
neg 0.04492 0.78195 0.8269
Sum 0.16100 0.83900 1.0000
$probabilities
PPV P(D+|T+) P(D-|T+) NPV P(D-|T-) P(D+|T-) LR+ LR-
0.67047 0.32953 0.94568 0.05432 10.60294 0.29936
Titanic <- expand.grid(list(SurvivalStatus = c("Survived","Did not survive"),
AgeSex = c("Child","Women","Man"),
PassengerClass = c("First","Second","Third")
)
)[,3:1]
Titanic$Frequency <- c(6,0,140,4,57,118,24,0,80,13,14,154,27,52,76,89,75,387)
Titanic
PassengerClass AgeSex SurvivalStatus Frequency
1 First Child Survived 6
2 First Child Did not survive 0
3 First Women Survived 140
4 First Women Did not survive 4
5 First Man Survived 57
6 First Man Did not survive 118
7 Second Child Survived 24
8 Second Child Did not survive 0
9 Second Women Survived 80
10 Second Women Did not survive 13
11 Second Man Survived 14
12 Second Man Did not survive 154
13 Third Child Survived 27
14 Third Child Did not survive 52
15 Third Women Survived 76
16 Third Women Did not survive 89
17 Third Man Survived 75
18 Third Man Did not survive 387
## Survival of all women, Survival of all children
p4.q1 <- xtabs(Frequency ~ AgeSex + SurvivalStatus, Titanic)
prop.table(p4.q1, 1)
SurvivalStatus
AgeSex Survived Did not survive
Child 0.5229 0.4771
Women 0.7363 0.2637
Man 0.1814 0.8186
## Survival of all women or children
colSums(prop.table(p4.q1[1:2,]))
Survived Did not survive
0.6908 0.3092
##
p4.q4 <- xtabs(Frequency ~ PassengerClass + SurvivalStatus, Titanic)
prop.table(p4.q4, 1)
SurvivalStatus
PassengerClass Survived Did not survive
First 0.6246 0.3754
Second 0.4140 0.5860
Third 0.2521 0.7479
## Part 5
fhs <- within(fhs, {
bmi1.cat <- cut(bmi1, c(-Inf, 18.5, 25, 30, Inf), right = F)
})
library(plyr)
table.part5 <- ddply(subset(fhs, !is.na(bmi1.cat)),
"bmi1.cat",
summarize,
NumberOfSubjects = length(bmi1.cat),
NumberOfDeath = sum(death == "Yes"),
TotalPersonYears = sum(timedth)
)
table.part5
bmi1.cat NumberOfSubjects NumberOfDeath TotalPersonYears
1 [-Inf,18.5) 57 18 1181
2 [18.5,25) 1936 571 40709
3 [25,30) 1845 689 37674
4 [30, Inf) 577 259 11309
c(bmi1.cat = "Total", colSums(table.part5[,-1]))
bmi1.cat NumberOfSubjects NumberOfDeath TotalPersonYears
"Total" "4415" "1537" "90873.1143052704"
within(table.part5, {
IncidenceRatePer1000PY = NumberOfDeath / TotalPersonYears * 1000
Risk = NumberOfDeath / NumberOfSubjects
})
bmi1.cat NumberOfSubjects NumberOfDeath TotalPersonYears Risk IncidenceRatePer1000PY
1 [-Inf,18.5) 57 18 1181 0.3158 15.24
2 [18.5,25) 1936 571 40709 0.2949 14.03
3 [25,30) 1845 689 37674 0.3734 18.29
4 [30, Inf) 577 259 11309 0.4489 22.90
## part 6
fhs <- within(fhs, {
underwt <- as.numeric(bmi1 < 18.5)
})
ddply(fhs,
"underwt",
summarize,
anychd_IncidenceRatePer1000PY = sum(anychd == "Yes") / sum(timechd) * 1000
)
underwt anychd_IncidenceRatePer1000PY
1 0 15.444
2 1 5.359
3 NA 21.710
ddply(fhs,
"bmi1.cat",
summarize,
anychd_IncidenceRatePer1000PY = sum(anychd == "Yes") / sum(timechd) * 1000
)
bmi1.cat anychd_IncidenceRatePer1000PY
1 [-Inf,18.5) 5.359
2 [18.5,25) 11.119
3 [25,30) 18.136
4 [30, Inf) 23.222
5 <NA> 21.710
## part 7
fhs <- within(fhs, {
highbp1 <- NA
highbp1[sysbp1 >= 140 | diabp1 >= 90] <- 1
highbp1[sysbp1 < 140 & diabp1 < 90] <- 0
})
ddply(fhs,
"highbp1",
summarize,
anychd_IncidenceRatePer1000PY = sum(anychd == "Yes") / sum(timechd) * 1000
)
highbp1 anychd_IncidenceRatePer1000PY
1 0 11.47
2 1 23.69