library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ipumsr)
library(haven)
library(survival)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(survminer)
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
##
## The following object is masked from 'package:survival':
##
## myeloma
library(ggplot2)
library(ggpubr)
library(muhaz)
library(carData)
library(dplyr)
** Read IPUMS NHIS data ** # I select data from 1990 to 2017
#setwd("C:\\Users\\shahi\\OneDrive\\Desktop\\Fall'22\\Dem 7223 (Event history)\\Assignments\\Assignment 2")
nhis <- read_ipums_ddi("nhis_00006.xml")
nhis_dat <- read_ipums_micro(nhis)
## Use of data from IPUMS NHIS is subject to conditions including that users
## should cite the data appropriately. Use command `ipums_conditions()` for more
## details.
nhis_dat <- haven::zap_labels(nhis_dat)
** Recode variable **
nhis_dat$MARST<- as.numeric(nhis_dat$MARST)
nhis_dat$MARST <- car:: Recode(nhis_dat$MARST, recodes= "11:40= 'married/ever married'; 50= 'unmarried'; else= NA", as.factor= T)
nhis_dat <- nhis_dat %>%
filter(MORTELIG == 1)
nhis_dat <- nhis_dat %>%
filter(SEX == 2)
nhis_dat <- nhis_dat%>%
filter(complete.cases(.))
#Event - All Cause Mortality for Single Woman. #Time Variable - The current year of mortality follow up is 2017, that is the censoring year for mortality #Censoring Indicator - If the respondent died within 5 years of the survey, they are coded as experiencing the event, they are otherwise censored.
nhis_dat <- nhis_dat %>%
mutate(death_age = ifelse( MORTSTAT ==1,
MORTDODY - (YEAR - AGE),
2017 - (YEAR - AGE)),
d.event = ifelse(MORTSTAT == 1, 1, 0))
library(survival)
age_fit <- survfit(Surv(death_age, d.event) ~ 1,
data = nhis_dat)
library(ggsurvfit)
age_fit %>%
ggsurvfit() +
add_confidence_interval(type = "ribbon") +
add_quantile()
** Estimating survival time functions **
head(nhis_dat[,c("death_age","d.event")], n=20)
## # A tibble: 20 × 2
## death_age d.event
## <dbl> <dbl>
## 1 53 0
## 2 72 0
## 3 54 0
## 4 56 0
## 5 64 0
## 6 41 0
## 7 75 1
## 8 36 1
## 9 73 0
## 10 43 0
## 11 47 0
## 12 58 0
## 13 46 0
## 14 50 0
## 15 45 0
## 16 38 0
## 17 65 1
## 18 46 0
## 19 47 0
## 20 98 0
head(Surv(nhis_dat$death_age, nhis_dat$d.event), n=20)
## [1] 53+ 72+ 54+ 56+ 64+ 41+ 75 36 73+ 43+ 47+ 58+ 46+ 50+ 45+ 38+ 65 46+ 47+
## [20] 98+
** 5a. Kaplan-Meier survival analysis of the outcome **
nhis_dat <- nhis_dat %>%
mutate(death_time = ifelse( MORTSTAT ==1,
MORTDODY - YEAR ,
2017 - YEAR ),
d5.event = ifelse(MORTSTAT == 1 & death_time <= 5, 1, 0))
library(survival)
time_fit <- survfit(Surv(death_time, d5.event) ~ 1,
conf.type = "log",
data = nhis_dat)
library(ggsurvfit)
time_fit %>%
ggsurvfit()+
xlim(-1, 6) +
add_censor_mark() +
add_confidence_interval()+
add_quantile(y_value = .975)
## Warning: Removed 16 row(s) containing missing values (geom_path).
summary(age_fit)
## Call: survfit(formula = Surv(death_age, d.event) ~ 1, data = nhis_dat)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 18 653351 4 0.99999 3.06e-06 0.99999 1.0000
## 19 653196 8 0.99998 5.30e-06 0.99997 1.0000
## 20 652805 10 0.99997 7.18e-06 0.99995 1.0000
## 21 652225 18 0.99994 9.69e-06 0.99992 1.0000
## 22 650998 23 0.99990 1.22e-05 0.99988 0.9999
## 23 649215 29 0.99986 1.47e-05 0.99983 0.9999
## 24 646693 29 0.99981 1.69e-05 0.99978 0.9998
## 25 643534 41 0.99975 1.96e-05 0.99971 0.9998
## 26 639494 42 0.99968 2.21e-05 0.99964 0.9997
## 27 634852 45 0.99961 2.45e-05 0.99957 0.9997
## 28 629673 62 0.99952 2.75e-05 0.99946 0.9996
## 29 624078 71 0.99940 3.06e-05 0.99934 0.9995
## 30 617907 100 0.99924 3.46e-05 0.99917 0.9993
## 31 611053 99 0.99908 3.82e-05 0.99900 0.9992
## 32 603573 80 0.99895 4.10e-05 0.99887 0.9990
## 33 595624 125 0.99874 4.51e-05 0.99865 0.9988
## 34 587100 118 0.99854 4.87e-05 0.99844 0.9986
## 35 577952 127 0.99832 5.25e-05 0.99821 0.9984
## 36 568168 164 0.99803 5.71e-05 0.99792 0.9981
## 37 557573 181 0.99770 6.19e-05 0.99758 0.9978
## 38 546431 178 0.99738 6.65e-05 0.99725 0.9975
## 39 535084 212 0.99698 7.18e-05 0.99684 0.9971
## 40 523798 217 0.99657 7.71e-05 0.99642 0.9967
## 41 512431 199 0.99618 8.18e-05 0.99602 0.9963
## 42 501178 240 0.99571 8.73e-05 0.99554 0.9959
## 43 489782 266 0.99517 9.34e-05 0.99498 0.9953
## 44 478463 309 0.99452 1.00e-04 0.99433 0.9947
## 45 466871 326 0.99383 1.07e-04 0.99362 0.9940
## 46 454913 388 0.99298 1.15e-04 0.99275 0.9932
## 47 442406 360 0.99217 1.23e-04 0.99193 0.9924
## 48 429824 429 0.99118 1.32e-04 0.99092 0.9914
## 49 417727 485 0.99003 1.42e-04 0.98975 0.9903
## 50 405742 477 0.98887 1.51e-04 0.98857 0.9892
## 51 393562 565 0.98745 1.62e-04 0.98713 0.9878
## 52 381104 532 0.98607 1.73e-04 0.98573 0.9864
## 53 368311 637 0.98436 1.85e-04 0.98400 0.9847
## 54 355087 684 0.98247 1.99e-04 0.98208 0.9829
## 55 341602 705 0.98044 2.12e-04 0.98002 0.9809
## 56 328309 788 0.97809 2.28e-04 0.97764 0.9785
## 57 314784 807 0.97558 2.44e-04 0.97510 0.9761
## 58 301512 817 0.97294 2.60e-04 0.97243 0.9734
## 59 288446 900 0.96990 2.78e-04 0.96936 0.9704
## 60 275289 882 0.96679 2.96e-04 0.96621 0.9674
## 61 261919 966 0.96323 3.17e-04 0.96261 0.9638
## 62 249150 1022 0.95928 3.39e-04 0.95861 0.9599
## 63 236758 1078 0.95491 3.62e-04 0.95420 0.9556
## 64 224734 1110 0.95019 3.87e-04 0.94943 0.9510
## 65 212845 1089 0.94533 4.12e-04 0.94452 0.9461
## 66 201392 1217 0.93962 4.41e-04 0.93875 0.9405
## 67 190240 1252 0.93343 4.72e-04 0.93251 0.9344
## 68 179272 1370 0.92630 5.06e-04 0.92531 0.9273
## 69 168549 1303 0.91914 5.39e-04 0.91808 0.9202
## 70 158077 1418 0.91089 5.77e-04 0.90976 0.9120
## 71 147484 1491 0.90169 6.19e-04 0.90047 0.9029
## 72 138719 1589 0.89136 6.64e-04 0.89006 0.8927
## 73 130348 1636 0.88017 7.11e-04 0.87878 0.8816
## 74 122135 1680 0.86806 7.60e-04 0.86658 0.8696
## 75 113914 1781 0.85449 8.13e-04 0.85290 0.8561
## 76 106296 1855 0.83958 8.70e-04 0.83788 0.8413
## 77 99085 1922 0.82329 9.29e-04 0.82148 0.8251
## 78 92176 2016 0.80529 9.91e-04 0.80335 0.8072
## 79 85578 2067 0.78584 1.06e-03 0.78377 0.7879
## 80 79131 2197 0.76402 1.12e-03 0.76182 0.7662
## 81 72914 2351 0.73938 1.20e-03 0.73704 0.7417
## 82 66909 2474 0.71204 1.27e-03 0.70955 0.7145
## 83 60943 2507 0.68275 1.35e-03 0.68012 0.6854
## 84 55271 2634 0.65022 1.43e-03 0.64743 0.6530
## 85 49822 3252 0.60778 1.51e-03 0.60481 0.6108
## 86 43426 3891 0.55332 1.61e-03 0.55017 0.5565
## 87 36808 3855 0.49537 1.69e-03 0.49206 0.4987
## 88 30482 3486 0.43872 1.75e-03 0.43530 0.4422
## 89 24700 3222 0.38149 1.79e-03 0.37800 0.3850
## 90 19569 2843 0.32606 1.81e-03 0.32255 0.3296
## 91 15249 2426 0.27419 1.80e-03 0.27069 0.2777
## 92 11680 2028 0.22658 1.77e-03 0.22314 0.2301
## 93 8778 1643 0.18417 1.72e-03 0.18083 0.1876
## 94 6380 1234 0.14855 1.66e-03 0.14533 0.1518
## 95 4591 938 0.11820 1.59e-03 0.11513 0.1214
## 96 3216 629 0.09508 1.52e-03 0.09214 0.0981
## 97 2264 502 0.07400 1.45e-03 0.07122 0.0769
## 98 1534 342 0.05750 1.37e-03 0.05487 0.0603
## 99 1006 189 0.04670 1.32e-03 0.04418 0.0494
## 100 661 119 0.03829 1.29e-03 0.03585 0.0409
## 101 437 67 0.03242 1.27e-03 0.03002 0.0350
## 102 293 37 0.02833 1.28e-03 0.02593 0.0309
## 103 183 21 0.02508 1.31e-03 0.02263 0.0278
## 104 101 5 0.02383 1.36e-03 0.02131 0.0267
## 105 45 1 0.02330 1.43e-03 0.02066 0.0263
## 106 4 3 0.00583 5.06e-03 0.00106 0.0319
## 107 1 1 0.00000 NaN NA NA
** Define a grouping variable ** # Marital Status is my grouping variable. My research hypothesis is that married women has higher mortality comapred to single women.
** Comparison of Kaplan-Meier survival across grouping variables in data.**
survdiff(Surv(death_time, d.event) ~ MARST,
data = nhis_dat)
## Call:
## survdiff(formula = Surv(death_time, d.event) ~ MARST, data = nhis_dat)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## MARST=married/ever married 508999 76984 65474 2023 9547
## MARST=unmarried 144352 6554 18064 7334 9547
##
## Chisq= 9547 on 1 degrees of freedom, p= <2e-16
** Plot the survival function **
group_fit <- survfit2(Surv(death_time, d.event) ~ MARST,
data = nhis_dat)
group_fit%>%
ggsurvfit() +
add_confidence_interval() +
add_risktable() +
labs(title = "Female mortality Survival Plot by Marital Status",
caption = "Source: NHIS 1990-2017 \n Calculations by Mahmuda Sultana", x= "time",
y = "Survival Probability")+ theme_minimal() +
theme(legend.position = "bottom",axis.line = element_line(linetype = "solid"),
axis.ticks = element_line(linetype = "blank"),
panel.grid.major = element_line(colour = "gray80"),
panel.grid.minor = element_line(colour = "gray94",
linetype = "blank"), axis.title = element_text(family = "serif",
face = "bold"), axis.text = element_text(face = "bold"),
plot.title = element_text(family = "serif",
size = 14, face = "bold") , legend.text = element_text(face = "bold",
family = "serif"), legend.title = element_text(face = "bold",
family = "serif"), panel.background = element_rect(fill = "gray97"),
legend.key = element_rect(fill = NA,
linetype = "solid"), legend.background = element_rect(linetype = "solid")) +
guides(color = guide_legend(ncol = 1)) +
coord_cartesian(xlim = c(0, 1)) +
scale_y_continuous(
limits = c(.99, 1),
labels = scales::percent,
expand = c(0.01, 0))+
scale_color_manual(values = c('#54738E', '#82AC7C')) +
scale_fill_manual(values = c('#54738E', '#82AC7C'))
## Warning: Removed 42 row(s) containing missing values (geom_path).