load library

gc()
##           used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells 2904526 155.2    5601877 299.2         NA  3999272 213.6
## Vcells 4873666  37.2   10146329  77.5      16384  6981735  53.3
library(ipumsr)
## 
## Attaching package: 'ipumsr'
## The following objects are masked from 'package:sjlabelled':
## 
##     as_factor, zap_labels
gc()
##           used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells 2908488 155.4    5601877 299.2         NA  3999272 213.6
## Vcells 4877932  37.3   10146329  77.5      16384  6981735  53.3
setwd("~/Downloads")

Read the IPUMS NHIS data

gc()
##           used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells 2908484 155.4    5601877 299.2         NA  3999272 213.6
## Vcells 4878089  37.3   10146329  77.5      16384  6981735  53.3
ddi <- read_ipums_ddi("nhis_00008.xml")
data <- read_ipums_micro(ddi)
## Use of data from IPUMS NHIS is subject to conditions including that users
## should cite the data appropriately. Use command `ipums_conditions()` for more
## details.
data<- haven::zap_labels(data)

Filter data to only mortality eligible and young adult age 18-35

data <- data %>%
  filter(MORTELIG == 1)

data <- data %>%
filter(AGE ==18)

data <- data%>%
filter(complete.cases(.))

1. Define your event variable

Answer: Young Adult mortality in the United State(Age 18-35),

This analysis uses NHIS data to see the mortality event among young adult age 18-35 in the United state. Due to data limitation the data used 2001-2018 variable as those who are eligible in 2001 will be 18years and their maximum age in 2018 will be 35years 2001-2018

2. Define a duration or time variable.

I would be considering time of young adult death

3. Define a censoring indicator

1 means dead, 0 means alive

4. Estimate the survival function for your outcome and plot it : Age at Death

Cross tabulation of mortality status and year

table(data$MORTSTAT, data$YEAR)
##    
##     2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015
##   1   27   26   23   21   23   16    6    8   22   15    9    7   12    3    1
##   2 1227 1087 1111 1166 1132  960 1019 1048 1259 1234 1285 1386 1260 1411  321
##    
##     2016 2017 2018
##   1    2    0    0
##   2  396  306  204
data <- data %>%
  mutate(death_age = ifelse( MORTSTAT ==1, 
                             MORTDODY - (YEAR - AGE), 
                             2018 - (YEAR - AGE)), 
         d.event = ifelse(MORTSTAT == 1, 1, 0))


library(survival)

age_fit <- survfit(Surv(death_age, d.event) ~ 1, 
                   data = data)

library(ggsurvfit)

age_fit %>%
  ggsurvfit() +
  add_confidence_interval(type = "ribbon") +
  add_quantile() 

5a. Kaplan-Meier survival analysis of the outcome( time to death)

Time to death

data <- data  %>%
  mutate(death_time = ifelse( MORTSTAT ==1, 
                             MORTDODY - YEAR , 
                             2018 - YEAR ), 
         d5.event = ifelse(MORTSTAT == 1 & death_time <= 5, 1, 0))

library(survival)

time_fit <- survfit(Surv(death_time, d5.event) ~ 1, 
                  conf.type = "log",
                   data = data )


time_fit %>%
  ggsurvfit()+
  xlim(-1, 6) +
  add_censor_mark() +
  add_confidence_interval()+
  add_quantile(y_value = .975) 
## Warning: Removed 12 row(s) containing missing values (geom_path).

5 b. Define a grouping variable, this can be dichotomous or categorical

Gender is my grouping variable

5c Do you have a research hypothesis about the survival patterns for the levels of the categorical variable? State it.

My Assumption is that males will have higher mortality compared to male young adult(Age18-35)

#d.5d Comparison of Kaplan-Meier survival across grouping variables in your data. Interpret your results.

# Testing the hypothesis to see if there is a significant difference in the Pharmacy survival chances between the two groups 

survdiff(Surv(death_time, d.event) ~ SEX, 
                   data = data)
## Call:
## survdiff(formula = Surv(death_time, d.event) ~ SEX, data = data)
## 
##          N Observed Expected (O-E)^2/E (O-E)^2/V
## SEX=1 9332      175      114      32.1        67
## SEX=2 8701       46      107      34.5        67
## 
##  Chisq= 67  on 1 degrees of freedom, p= 3e-16

The result shows that there is a significant difference between males and females, as male was higher as hypothesized.

5e. Plot the survival function for the analysis for each level of the group variable

# Estimating the survival function for the groups
group_fit <- survfit(Surv(death_time, d.event) ~ SEX, 
                   data = data)

#summary(group_fit)
# Plotting the Survival functions for the two groups
group_fit%>%
  ggsurvfit() +
   add_confidence_interval() +
   add_risktable() +
  labs(title = "Young adult mortality Survival Plot by Gender",
       caption = "Source: NHIS 2001-2018 \n Calculations by Joseph Jaiyeola", x= "time",
      y = "Survival Probability")+  theme_minimal() +
  theme(legend.position = "bottom",axis.line = element_line(linetype = "solid"),
    axis.ticks = element_line(linetype = "blank"),
    panel.grid.major = element_line(colour = "gray80"),
    panel.grid.minor = element_line(colour = "gray94",
        linetype = "blank"), axis.title = element_text(family = "serif",
        face = "bold"), axis.text = element_text(face = "bold"),
    plot.title = element_text(family = "serif",
        size = 14, face = "bold") , legend.text = element_text(face = "bold",
        family = "serif"), legend.title = element_text(face = "bold",
        family = "serif"), panel.background = element_rect(fill = "gray97"),
    legend.key = element_rect(fill = NA,
        linetype = "solid"), legend.background = element_rect(linetype = "solid")) +
  guides(color = guide_legend(ncol = 1)) +
    coord_cartesian(xlim = c(0, 30)) +
  scale_y_continuous(
    limits = c(.95, 1),
    labels = scales::percent, 
    expand = c(0.01, 0))+
  scale_color_manual(values = c('#54738E', '#82AC7C')) +
  scale_fill_manual(values = c('#54738E', '#82AC7C')) 
## Warning: Removed 1 row(s) containing missing values (geom_path).

LS0tCnRpdGxlOiAiRXZlbnQgSGlzdG9yeSBBbmFseXNpcyBBc3MyIgphdXRob3I6ICJKb3NlcGggSmFpeWVvbGEiCmRhdGU6ICJgciBmb3JtYXQoU3lzLnRpbWUoKSwgJyVkICVCLCAlWScpYCIKb3V0cHV0OgogICBodG1sX2RvY3VtZW50OgogICAgZGZfcHJpbnQ6IHBhZ2VkCiAgICBmaWdfaGVpZ2h0OiA3CiAgICBmaWdfd2lkdGg6IDcKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwogICAgY29kZV9kb3dubG9hZDogdHJ1ZQotLS0KCiMjIyBsb2FkIGxpYnJhcnkKYGBge3IgaW5jbHVkZT1GQUxTRX0KZ2MoKQpwYWNrYWdlcyA8LSBjKCJ0aWR5dmVyc2UiLCAia25pdHIiLCAidGliYmxlIiwgImthYmxlRXh0cmEiLCAib2ZmaWNlciIsICJjb21wYXJlR3JvdXBzIiwgImNhciIsICJoYXZlbiIsICJmb3JlaWduIiwgInRpZHlyIiwgIm1hZ3JpdHRyIiwgImxhYmVsbGVkIiwgImdncGxvdDIiLCAiZm9yZWlnbiIsICJsbWU0IiwgImxtZXJUZXN0IiwgIk11TUluIiwgIk1BU1MiLCAiYXJtIiwgImxhdHRpY2UiLCAicHNjbCIsICJzdXJ2aXZhbCIsICJzdXJ2ZXkiLCAic3J2eXIiLCAic3Vydm1pbmVyIiwgImVmZmVjdHMiLCAiZ3QiLCAiZ3RzdW1tYXJ5IiwgImdsdWUiLCAiZmxleHRhYmxlIiwgImh1eHRhYmxlIiwgIm1vZGVsc3VtbWFyeSIsICJicm9vbSIsICJicm9vbS5taXhlZCIsICJsYXR0aWNlIiwgImNtcHJzayIsICJzdGFyZ2F6ZXIiLCAic2pzdGF0cyIsICJzalBsb3QiLCAic2ptaXNjIiwgInNqbGFiZWxsZWQiKQppbnZpc2libGUobGFwcGx5KHBhY2thZ2VzLCBmdW5jdGlvbih4KSByZXF1aXJlKHgsIGNoYXJhY3Rlci5vbmx5ID0gVCwgcXVpZXRseSA9IFQpKSkKYGBgCgpgYGB7cn0KZ2MoKQpsaWJyYXJ5KGlwdW1zcikKYGBgCgpgYGB7cn0KZ2MoKQpzZXR3ZCgifi9Eb3dubG9hZHMiKQpgYGAKCiMjIyBSZWFkIHRoZSBJUFVNUyBOSElTIGRhdGEKYGBge3J9CmdjKCkKZGRpIDwtIHJlYWRfaXB1bXNfZGRpKCJuaGlzXzAwMDA4LnhtbCIpCmRhdGEgPC0gcmVhZF9pcHVtc19taWNybyhkZGkpCmRhdGE8LSBoYXZlbjo6emFwX2xhYmVscyhkYXRhKQpgYGAKCgojIyMgRmlsdGVyIGRhdGEgdG8gb25seSBtb3J0YWxpdHkgZWxpZ2libGUgYW5kIHlvdW5nIGFkdWx0IGFnZSAxOC0zNQpgYGB7cn0KZGF0YSA8LSBkYXRhICU+JQogIGZpbHRlcihNT1JURUxJRyA9PSAxKQoKZGF0YSA8LSBkYXRhICU+JQpmaWx0ZXIoQUdFID09MTgpCgpkYXRhIDwtIGRhdGElPiUKZmlsdGVyKGNvbXBsZXRlLmNhc2VzKC4pKQoKYGBgCgoKIyAxLglEZWZpbmUgeW91ciBldmVudCB2YXJpYWJsZSAKQW5zd2VyOiBZb3VuZyBBZHVsdCBtb3J0YWxpdHkgaW4gdGhlIFVuaXRlZCBTdGF0ZShBZ2UgMTgtMzUpLAoKVGhpcyBhbmFseXNpcyB1c2VzIE5ISVMgZGF0YSB0byBzZWUgdGhlIG1vcnRhbGl0eSBldmVudCBhbW9uZyB5b3VuZyBhZHVsdCBhZ2UgMTgtMzUgaW4gdGhlIFVuaXRlZCBzdGF0ZS4gRHVlIHRvIGRhdGEgbGltaXRhdGlvbiB0aGUgZGF0YSB1c2VkIDIwMDEtMjAxOCB2YXJpYWJsZSBhcyB0aG9zZSB3aG8gYXJlIGVsaWdpYmxlIGluIDIwMDEgd2lsbCBiZSAxOHllYXJzIGFuZCB0aGVpciBtYXhpbXVtIGFnZSBpbiAyMDE4IHdpbGwgYmUgMzV5ZWFycwoyMDAxLTIwMTgKCgojIDIuIERlZmluZSBhIGR1cmF0aW9uIG9yIHRpbWUgdmFyaWFibGUuCkkgd291bGQgYmUgY29uc2lkZXJpbmcgdGltZSBvZiB5b3VuZyBhZHVsdCBkZWF0aAoKCgojIDMuCURlZmluZSBhIGNlbnNvcmluZyBpbmRpY2F0b3IgCjEgbWVhbnMgZGVhZCwgMCBtZWFucyBhbGl2ZQoKCgoKIyA0LglFc3RpbWF0ZSB0aGUgc3Vydml2YWwgZnVuY3Rpb24gZm9yIHlvdXIgb3V0Y29tZSBhbmQgcGxvdCBpdCA6IEFnZSBhdCBEZWF0aAoKIyMgQ3Jvc3MgdGFidWxhdGlvbiBvZiBtb3J0YWxpdHkgc3RhdHVzIGFuZCB5ZWFyCmBgYHtyfQp0YWJsZShkYXRhJE1PUlRTVEFULCBkYXRhJFlFQVIpCmBgYAoKYGBge3J9CmRhdGEgPC0gZGF0YSAlPiUKICBtdXRhdGUoZGVhdGhfYWdlID0gaWZlbHNlKCBNT1JUU1RBVCA9PTEsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgIE1PUlRET0RZIC0gKFlFQVIgLSBBR0UpLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAyMDE4IC0gKFlFQVIgLSBBR0UpKSwgCiAgICAgICAgIGQuZXZlbnQgPSBpZmVsc2UoTU9SVFNUQVQgPT0gMSwgMSwgMCkpCgoKbGlicmFyeShzdXJ2aXZhbCkKCmFnZV9maXQgPC0gc3VydmZpdChTdXJ2KGRlYXRoX2FnZSwgZC5ldmVudCkgfiAxLCAKICAgICAgICAgICAgICAgICAgIGRhdGEgPSBkYXRhKQoKbGlicmFyeShnZ3N1cnZmaXQpCgphZ2VfZml0ICU+JQogIGdnc3VydmZpdCgpICsKICBhZGRfY29uZmlkZW5jZV9pbnRlcnZhbCh0eXBlID0gInJpYmJvbiIpICsKICBhZGRfcXVhbnRpbGUoKSAKYGBgCgoKCgoKIyA1YS4JS2FwbGFuLU1laWVyIHN1cnZpdmFsIGFuYWx5c2lzIG9mIHRoZSBvdXRjb21lKCB0aW1lIHRvIGRlYXRoKQojIFRpbWUgdG8gZGVhdGgKCmBgYHtyfQpkYXRhIDwtIGRhdGEgICU+JQogIG11dGF0ZShkZWF0aF90aW1lID0gaWZlbHNlKCBNT1JUU1RBVCA9PTEsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgIE1PUlRET0RZIC0gWUVBUiAsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgIDIwMTggLSBZRUFSICksIAogICAgICAgICBkNS5ldmVudCA9IGlmZWxzZShNT1JUU1RBVCA9PSAxICYgZGVhdGhfdGltZSA8PSA1LCAxLCAwKSkKCmxpYnJhcnkoc3Vydml2YWwpCgp0aW1lX2ZpdCA8LSBzdXJ2Zml0KFN1cnYoZGVhdGhfdGltZSwgZDUuZXZlbnQpIH4gMSwgCiAgICAgICAgICAgICAgICAgIGNvbmYudHlwZSA9ICJsb2ciLAogICAgICAgICAgICAgICAgICAgZGF0YSA9IGRhdGEgKQoKCnRpbWVfZml0ICU+JQogIGdnc3VydmZpdCgpKwogIHhsaW0oLTEsIDYpICsKICBhZGRfY2Vuc29yX21hcmsoKSArCiAgYWRkX2NvbmZpZGVuY2VfaW50ZXJ2YWwoKSsKICBhZGRfcXVhbnRpbGUoeV92YWx1ZSA9IC45NzUpIApgYGAKCgojIDUgYi4JRGVmaW5lIGEgZ3JvdXBpbmcgdmFyaWFibGUsIHRoaXMgY2FuIGJlIGRpY2hvdG9tb3VzIG9yIGNhdGVnb3JpY2FsCkdlbmRlciBpcyBteSBncm91cGluZyB2YXJpYWJsZQoKIyA1YyBEbyB5b3UgaGF2ZSBhIHJlc2VhcmNoIGh5cG90aGVzaXMgYWJvdXQgdGhlIHN1cnZpdmFsIHBhdHRlcm5zIGZvciB0aGUgbGV2ZWxzIG9mIHRoZSBjYXRlZ29yaWNhbCB2YXJpYWJsZT8gIFN0YXRlIGl0LgpNeSBBc3N1bXB0aW9uIGlzIHRoYXQgbWFsZXMgd2lsbCBoYXZlIGhpZ2hlciBtb3J0YWxpdHkgY29tcGFyZWQgdG8gbWFsZSB5b3VuZyBhZHVsdChBZ2UxOC0zNSkKCgoKCiNkLjVkIAlDb21wYXJpc29uIG9mIEthcGxhbi1NZWllciBzdXJ2aXZhbCBhY3Jvc3MgZ3JvdXBpbmcgdmFyaWFibGVzIGluIHlvdXIgZGF0YS4gSW50ZXJwcmV0IHlvdXIgcmVzdWx0cy4KYGBge3J9CiMgVGVzdGluZyB0aGUgaHlwb3RoZXNpcyB0byBzZWUgaWYgdGhlcmUgaXMgYSBzaWduaWZpY2FudCBkaWZmZXJlbmNlIGluIHRoZSBQaGFybWFjeSBzdXJ2aXZhbCBjaGFuY2VzIGJldHdlZW4gdGhlIHR3byBncm91cHMgCgpzdXJ2ZGlmZihTdXJ2KGRlYXRoX3RpbWUsIGQuZXZlbnQpIH4gU0VYLCAKICAgICAgICAgICAgICAgICAgIGRhdGEgPSBkYXRhKQpgYGAKCgpUaGUgcmVzdWx0IHNob3dzIHRoYXQgdGhlcmUgaXMgYSBzaWduaWZpY2FudCBkaWZmZXJlbmNlIGJldHdlZW4gbWFsZXMgYW5kIGZlbWFsZXMsIGFzIG1hbGUgd2FzIGhpZ2hlciBhcyBoeXBvdGhlc2l6ZWQuIAoKCiMgNWUuCVBsb3QgdGhlIHN1cnZpdmFsIGZ1bmN0aW9uIGZvciB0aGUgYW5hbHlzaXMgZm9yIGVhY2ggbGV2ZWwgb2YgdGhlIGdyb3VwIHZhcmlhYmxlCgpgYGB7cn0KIyBFc3RpbWF0aW5nIHRoZSBzdXJ2aXZhbCBmdW5jdGlvbiBmb3IgdGhlIGdyb3Vwcwpncm91cF9maXQgPC0gc3VydmZpdChTdXJ2KGRlYXRoX3RpbWUsIGQuZXZlbnQpIH4gU0VYLCAKICAgICAgICAgICAgICAgICAgIGRhdGEgPSBkYXRhKQoKI3N1bW1hcnkoZ3JvdXBfZml0KQojIFBsb3R0aW5nIHRoZSBTdXJ2aXZhbCBmdW5jdGlvbnMgZm9yIHRoZSB0d28gZ3JvdXBzCmdyb3VwX2ZpdCU+JQogIGdnc3VydmZpdCgpICsKICAgYWRkX2NvbmZpZGVuY2VfaW50ZXJ2YWwoKSArCiAgIGFkZF9yaXNrdGFibGUoKSArCiAgbGFicyh0aXRsZSA9ICJZb3VuZyBhZHVsdCBtb3J0YWxpdHkgU3Vydml2YWwgUGxvdCBieSBHZW5kZXIiLAogICAgICAgY2FwdGlvbiA9ICJTb3VyY2U6IE5ISVMgMjAwMS0yMDE4IFxuIENhbGN1bGF0aW9ucyBieSBKb3NlcGggSmFpeWVvbGEiLCB4PSAidGltZSIsCiAgICAgIHkgPSAiU3Vydml2YWwgUHJvYmFiaWxpdHkiKSsgIHRoZW1lX21pbmltYWwoKSArCiAgdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gImJvdHRvbSIsYXhpcy5saW5lID0gZWxlbWVudF9saW5lKGxpbmV0eXBlID0gInNvbGlkIiksCiAgICBheGlzLnRpY2tzID0gZWxlbWVudF9saW5lKGxpbmV0eXBlID0gImJsYW5rIiksCiAgICBwYW5lbC5ncmlkLm1ham9yID0gZWxlbWVudF9saW5lKGNvbG91ciA9ICJncmF5ODAiKSwKICAgIHBhbmVsLmdyaWQubWlub3IgPSBlbGVtZW50X2xpbmUoY29sb3VyID0gImdyYXk5NCIsCiAgICAgICAgbGluZXR5cGUgPSAiYmxhbmsiKSwgYXhpcy50aXRsZSA9IGVsZW1lbnRfdGV4dChmYW1pbHkgPSAic2VyaWYiLAogICAgICAgIGZhY2UgPSAiYm9sZCIpLCBheGlzLnRleHQgPSBlbGVtZW50X3RleHQoZmFjZSA9ICJib2xkIiksCiAgICBwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KGZhbWlseSA9ICJzZXJpZiIsCiAgICAgICAgc2l6ZSA9IDE0LCBmYWNlID0gImJvbGQiKSAsIGxlZ2VuZC50ZXh0ID0gZWxlbWVudF90ZXh0KGZhY2UgPSAiYm9sZCIsCiAgICAgICAgZmFtaWx5ID0gInNlcmlmIiksIGxlZ2VuZC50aXRsZSA9IGVsZW1lbnRfdGV4dChmYWNlID0gImJvbGQiLAogICAgICAgIGZhbWlseSA9ICJzZXJpZiIpLCBwYW5lbC5iYWNrZ3JvdW5kID0gZWxlbWVudF9yZWN0KGZpbGwgPSAiZ3JheTk3IiksCiAgICBsZWdlbmQua2V5ID0gZWxlbWVudF9yZWN0KGZpbGwgPSBOQSwKICAgICAgICBsaW5ldHlwZSA9ICJzb2xpZCIpLCBsZWdlbmQuYmFja2dyb3VuZCA9IGVsZW1lbnRfcmVjdChsaW5ldHlwZSA9ICJzb2xpZCIpKSArCiAgZ3VpZGVzKGNvbG9yID0gZ3VpZGVfbGVnZW5kKG5jb2wgPSAxKSkgKwogICAgY29vcmRfY2FydGVzaWFuKHhsaW0gPSBjKDAsIDMwKSkgKwogIHNjYWxlX3lfY29udGludW91cygKICAgIGxpbWl0cyA9IGMoLjk1LCAxKSwKICAgIGxhYmVscyA9IHNjYWxlczo6cGVyY2VudCwgCiAgICBleHBhbmQgPSBjKDAuMDEsIDApKSsKICBzY2FsZV9jb2xvcl9tYW51YWwodmFsdWVzID0gYygnIzU0NzM4RScsICcjODJBQzdDJykpICsKICBzY2FsZV9maWxsX21hbnVhbCh2YWx1ZXMgPSBjKCcjNTQ3MzhFJywgJyM4MkFDN0MnKSkgCmBgYAoKCgoK