Consider the survival data given in Exercise Table 2.1 and compute and plot the estimated survivorship, the probability density and the hazard functions.
Initial_Time | End_Time | Number_Survivors_Initial | Number_Dying_in_Interval |
0 | 1 | 1,100 | 240 |
1 | 2 | 860 | 180 |
2 | 3 | 680 | 184 |
3 | 4 | 496 | 138 |
4 | 5 | 358 | 118 |
5 | 6 | 240 | 60 |
6 | 7 | 180 | 52 |
7 | 8 | 128 | 44 |
8 | 9 | 84 | 32 |
9 | 52 | 28 |
Survivorship is the probability that an individual survives longer than t. It is estimated with the following equation:
\[\hat{S}(t) = \frac{number\;of\;items\;surviving\;longer\;than\;t}{total\;number\;of\;patients}\]
data.2.1 <- data.2.1 %>%
mutate('S(t)' = Number_Survivors_Initial/max(Number_Survivors_Initial))
data.2.1 %>%
select(Initial_Time, "S(t)") %>%
flextable() %>%
autofit()
Initial_Time | S(t) |
0 | 1.00000000 |
1 | 0.78181818 |
2 | 0.61818182 |
3 | 0.45090909 |
4 | 0.32545455 |
5 | 0.21818182 |
6 | 0.16363636 |
7 | 0.11636364 |
8 | 0.07636364 |
9 | 0.04727273 |
ggplot(data.2.1, aes(`Initial_Time`, `S(t)`)) +
geom_line() +
geom_point() +
ylab(expression(paste(hat("S"), "(t)"))) +
xlab("t")
To estimate the probability density the following equation is used:
\[\hat{f}(t) = \frac{number\;of\;items\;dying\;in\;the\;interval\;beginning\;at\; time \;t}{(total\;number\;of\;items)*(interval\; width)}\] This probability density is at the mid point of the interval
data.2.1 <- data.2.1 %>%
mutate(Mid_Time = (Initial_Time + End_Time)/2) %>%
mutate("f(t)" = Number_Dying_in_Interval/(max(Number_Survivors_Initial) * (End_Time - Initial_Time)))
data.2.1 %>%
select(Mid_Time, "f(t)") %>%
flextable() %>%
autofit()
Mid_Time | f(t) |
0.5 | 0.21818182 |
1.5 | 0.16363636 |
2.5 | 0.16727273 |
3.5 | 0.12545455 |
4.5 | 0.10727273 |
5.5 | 0.05454545 |
6.5 | 0.04727273 |
7.5 | 0.04000000 |
8.5 | 0.02909091 |
ggplot(data.2.1, aes(`Mid_Time`, `f(t)`)) +
geom_line() +
geom_point() +
ylab(expression(paste(hat("f"), "(t)"))) +
xlab("t")
To estimate the hazard function the following equation can be used:
\[\hat{h}(t) = \frac{number\;of\;items\;dying\;per\;unit\;time\;in\;the\; interval}{(number\;of\;items\;surviving \; at\; t)-(number\;of\;deaths\;in\;the\;interval)/2}\]
data.2.1 <- data.2.1 %>%
mutate(Deaths_Per_Time = Number_Dying_in_Interval/(End_Time - Initial_Time),
"h(t)" = Deaths_Per_Time/(Number_Survivors_Initial - Number_Dying_in_Interval/2))
data.2.1 %>%
select(Mid_Time, "h(t)") %>%
flextable() %>%
autofit()
Mid_Time | h(t) |
0.5 | 0.2448980 |
1.5 | 0.2337662 |
2.5 | 0.3129252 |
3.5 | 0.3231850 |
4.5 | 0.3946488 |
5.5 | 0.2857143 |
6.5 | 0.3376623 |
7.5 | 0.4150943 |
8.5 | 0.4705882 |
ggplot(data.2.1, aes(`Mid_Time`, `h(t)`)) +
geom_line() +
geom_point() +
ylab(expression(paste(hat("h"), "(t)"))) +
xlab("t")
Table 2.2 is a life table for the total population of 100,000 live births in the United States, 1959 – 1961. Compute and plot the estimated survivorship, the probability density, and the hazard function.
Initial_Time | End_Time | Number_Survivors_Initial | Number_Dying_in_Interval |
0 | 1 | 100,000 | 2,593 |
1 | 5 | 97,407 | 409 |
5 | 10 | 96,998 | 233 |
10 | 15 | 96,765 | 214 |
15 | 20 | 96,551 | 440 |
20 | 25 | 96,111 | 594 |
25 | 30 | 95,517 | 612 |
30 | 35 | 94,905 | 761 |
35 | 40 | 94,144 | 1,080 |
40 | 45 | 93,064 | 1,686 |
45 | 50 | 91,378 | 2,622 |
50 | 55 | 88,756 | 4,045 |
55 | 60 | 84,711 | 5,644 |
60 | 65 | 79,067 | 7,920 |
65 | 70 | 71,147 | 10,290 |
70 | 75 | 60,857 | 12,687 |
75 | 80 | 48,170 | 14,594 |
80 | 85 | 33,576 | 15,034 |
85 | 18,542 | 18,542 |
data.2.2 <- data.2.2 %>%
mutate('S(t)' = Number_Survivors_Initial/max(Number_Survivors_Initial))
data.2.2 %>%
select(Initial_Time, "S(t)") %>%
flextable() %>%
autofit()
Initial_Time | S(t) |
0 | 1.00000 |
1 | 0.97407 |
5 | 0.96998 |
10 | 0.96765 |
15 | 0.96551 |
20 | 0.96111 |
25 | 0.95517 |
30 | 0.94905 |
35 | 0.94144 |
40 | 0.93064 |
45 | 0.91378 |
50 | 0.88756 |
55 | 0.84711 |
60 | 0.79067 |
65 | 0.71147 |
70 | 0.60857 |
75 | 0.48170 |
80 | 0.33576 |
85 | 0.18542 |
ggplot(data.2.2, aes(`Initial_Time`, `S(t)`)) +
geom_line() +
geom_point() +
ylab(expression(paste(hat("S"), "(t)"))) +
xlab("t")
data.2.2 <- data.2.2 %>%
mutate(Mid_Time = (Initial_Time + End_Time)/2) %>%
mutate("f(t)" = Number_Dying_in_Interval/(max(Number_Survivors_Initial) * (End_Time - Initial_Time)))
data.2.2 %>%
select(Mid_Time, "f(t)") %>%
flextable() %>%
autofit()
Mid_Time | f(t) |
0.5 | 0.0259300 |
3.0 | 0.0010225 |
7.5 | 0.0004660 |
12.5 | 0.0004280 |
17.5 | 0.0008800 |
22.5 | 0.0011880 |
27.5 | 0.0012240 |
32.5 | 0.0015220 |
37.5 | 0.0021600 |
42.5 | 0.0033720 |
47.5 | 0.0052440 |
52.5 | 0.0080900 |
57.5 | 0.0112880 |
62.5 | 0.0158400 |
67.5 | 0.0205800 |
72.5 | 0.0253740 |
77.5 | 0.0291880 |
82.5 | 0.0300680 |
ggplot(data.2.2, aes(`Mid_Time`, `f(t)`)) +
geom_line() +
geom_point() +
ylab(expression(paste(hat("f"), "(t)"))) +
xlab("t")
data.2.2 <- data.2.2 %>%
mutate(Deaths_Per_Time = Number_Dying_in_Interval/(End_Time - Initial_Time),
"h(t)" = Deaths_Per_Time/(Number_Survivors_Initial - Number_Dying_in_Interval/2))
data.2.2 %>%
select(Mid_Time, "h(t)") %>%
flextable() %>%
autofit()
Mid_Time | h(t) |
0.5 | 0.0262705983 |
3.0 | 0.0010519277 |
7.5 | 0.0004810000 |
12.5 | 0.0004427983 |
17.5 | 0.0009135169 |
22.5 | 0.0012399023 |
27.5 | 0.0012855657 |
32.5 | 0.0016101646 |
37.5 | 0.0023075937 |
42.5 | 0.0036564340 |
47.5 | 0.0058223323 |
52.5 | 0.0093274225 |
57.5 | 0.0137845132 |
62.5 | 0.0210899117 |
67.5 | 0.0311808733 |
72.5 | 0.0465462684 |
77.5 | 0.0714114452 |
82.5 | 0.1153843202 |
ggplot(data.2.2, aes(`Mid_Time`, `h(t)`)) +
geom_line() +
geom_point() +
ylab(expression(paste(hat("h"), "(t)"))) +
xlab("t")