load library
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 2904526 155.2 5601877 299.2 NA 3999272 213.6
## Vcells 4873666 37.2 10146329 77.5 16384 6981735 53.3
library(ipumsr)
##
## Attaching package: 'ipumsr'
## The following objects are masked from 'package:sjlabelled':
##
## as_factor, zap_labels
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 2908488 155.4 5601877 299.2 NA 3999272 213.6
## Vcells 4877932 37.3 10146329 77.5 16384 6981735 53.3
setwd("~/Downloads")
Read the IPUMS NHIS data
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 2908484 155.4 5601877 299.2 NA 3999272 213.6
## Vcells 4878089 37.3 10146329 77.5 16384 6981735 53.3
ddi <- read_ipums_ddi("nhis_00008.xml")
data <- read_ipums_micro(ddi)
## Use of data from IPUMS NHIS is subject to conditions including that users
## should cite the data appropriately. Use command `ipums_conditions()` for more
## details.
data<- haven::zap_labels(data)
Filter data to only mortality eligible and young adult age
18-35
data <- data %>%
filter(MORTELIG == 1)
data <- data %>%
filter(AGE ==18)
data <- data%>%
filter(complete.cases(.))
1. Define your event variable
Answer: Young Adult mortality in the United State(Age 18-35),
This analysis uses NHIS data to see the mortality event among young
adult age 18-35 in the United state. Due to data limitation the data
used 2001-2018 variable as those who are eligible in 2001 will be
18years and their maximum age in 2018 will be 35years 2001-2018
2. Define a duration or time variable.
I would be considering time of young adult death
3. Define a censoring indicator
1 means dead, 0 means alive
4. Estimate the survival function for your outcome and plot it : Age
at Death
Cross tabulation of mortality status and year
table(data$MORTSTAT, data$YEAR)
##
## 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015
## 1 27 26 23 21 23 16 6 8 22 15 9 7 12 3 1
## 2 1227 1087 1111 1166 1132 960 1019 1048 1259 1234 1285 1386 1260 1411 321
##
## 2016 2017 2018
## 1 2 0 0
## 2 396 306 204
data <- data %>%
mutate(death_age = ifelse( MORTSTAT ==1,
MORTDODY - (YEAR - AGE),
2018 - (YEAR - AGE)),
d.event = ifelse(MORTSTAT == 1, 1, 0))
library(survival)
age_fit <- survfit(Surv(death_age, d.event) ~ 1,
data = data)
library(ggsurvfit)
age_fit %>%
ggsurvfit() +
add_confidence_interval(type = "ribbon") +
add_quantile()

5a. Kaplan-Meier survival analysis of the outcome( time to
death)
Time to death
data <- data %>%
mutate(death_time = ifelse( MORTSTAT ==1,
MORTDODY - YEAR ,
2018 - YEAR ),
d5.event = ifelse(MORTSTAT == 1 & death_time <= 5, 1, 0))
library(survival)
time_fit <- survfit(Surv(death_time, d5.event) ~ 1,
conf.type = "log",
data = data )
time_fit %>%
ggsurvfit()+
xlim(-1, 6) +
add_censor_mark() +
add_confidence_interval()+
add_quantile(y_value = .975)
## Warning: Removed 12 row(s) containing missing values (geom_path).

5 b. Define a grouping variable, this can be dichotomous or
categorical
Gender is my grouping variable
5c Do you have a research hypothesis about the survival patterns for
the levels of the categorical variable? State it.
My Assumption is that males will have higher mortality compared to
male young adult(Age18-35)
#d.5d Comparison of Kaplan-Meier survival across grouping variables
in your data. Interpret your results.
# Testing the hypothesis to see if there is a significant difference in the Pharmacy survival chances between the two groups
survdiff(Surv(death_time, d.event) ~ SEX,
data = data)
## Call:
## survdiff(formula = Surv(death_time, d.event) ~ SEX, data = data)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## SEX=1 9332 175 114 32.1 67
## SEX=2 8701 46 107 34.5 67
##
## Chisq= 67 on 1 degrees of freedom, p= 3e-16
The result shows that there is a significant difference between males
and females, as male was higher as hypothesized.
5e. Plot the survival function for the analysis for each level of
the group variable
# Estimating the survival function for the groups
group_fit <- survfit(Surv(death_time, d.event) ~ SEX,
data = data)
#summary(group_fit)
# Plotting the Survival functions for the two groups
group_fit%>%
ggsurvfit() +
add_confidence_interval() +
add_risktable() +
labs(title = "Young adult mortality Survival Plot by Gender",
caption = "Source: NHIS 2001-2018 \n Calculations by Joseph Jaiyeola", x= "time",
y = "Survival Probability")+ theme_minimal() +
theme(legend.position = "bottom",axis.line = element_line(linetype = "solid"),
axis.ticks = element_line(linetype = "blank"),
panel.grid.major = element_line(colour = "gray80"),
panel.grid.minor = element_line(colour = "gray94",
linetype = "blank"), axis.title = element_text(family = "serif",
face = "bold"), axis.text = element_text(face = "bold"),
plot.title = element_text(family = "serif",
size = 14, face = "bold") , legend.text = element_text(face = "bold",
family = "serif"), legend.title = element_text(face = "bold",
family = "serif"), panel.background = element_rect(fill = "gray97"),
legend.key = element_rect(fill = NA,
linetype = "solid"), legend.background = element_rect(linetype = "solid")) +
guides(color = guide_legend(ncol = 1)) +
coord_cartesian(xlim = c(0, 30)) +
scale_y_continuous(
limits = c(.95, 1),
labels = scales::percent,
expand = c(0.01, 0))+
scale_color_manual(values = c('#54738E', '#82AC7C')) +
scale_fill_manual(values = c('#54738E', '#82AC7C'))
## Warning: Removed 1 row(s) containing missing values (geom_path).

LS0tCnRpdGxlOiAiRXZlbnQgSGlzdG9yeSBBbmFseXNpcyBBc3MyIgphdXRob3I6ICJKb3NlcGggSmFpeWVvbGEiCmRhdGU6ICJgciBmb3JtYXQoU3lzLnRpbWUoKSwgJyVkICVCLCAlWScpYCIKb3V0cHV0OgogICBodG1sX2RvY3VtZW50OgogICAgZGZfcHJpbnQ6IHBhZ2VkCiAgICBmaWdfaGVpZ2h0OiA3CiAgICBmaWdfd2lkdGg6IDcKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwogICAgY29kZV9kb3dubG9hZDogdHJ1ZQotLS0KCiMjIyBsb2FkIGxpYnJhcnkKYGBge3IgaW5jbHVkZT1GQUxTRX0KZ2MoKQpwYWNrYWdlcyA8LSBjKCJ0aWR5dmVyc2UiLCAia25pdHIiLCAidGliYmxlIiwgImthYmxlRXh0cmEiLCAib2ZmaWNlciIsICJjb21wYXJlR3JvdXBzIiwgImNhciIsICJoYXZlbiIsICJmb3JlaWduIiwgInRpZHlyIiwgIm1hZ3JpdHRyIiwgImxhYmVsbGVkIiwgImdncGxvdDIiLCAiZm9yZWlnbiIsICJsbWU0IiwgImxtZXJUZXN0IiwgIk11TUluIiwgIk1BU1MiLCAiYXJtIiwgImxhdHRpY2UiLCAicHNjbCIsICJzdXJ2aXZhbCIsICJzdXJ2ZXkiLCAic3J2eXIiLCAic3Vydm1pbmVyIiwgImVmZmVjdHMiLCAiZ3QiLCAiZ3RzdW1tYXJ5IiwgImdsdWUiLCAiZmxleHRhYmxlIiwgImh1eHRhYmxlIiwgIm1vZGVsc3VtbWFyeSIsICJicm9vbSIsICJicm9vbS5taXhlZCIsICJsYXR0aWNlIiwgImNtcHJzayIsICJzdGFyZ2F6ZXIiLCAic2pzdGF0cyIsICJzalBsb3QiLCAic2ptaXNjIiwgInNqbGFiZWxsZWQiKQppbnZpc2libGUobGFwcGx5KHBhY2thZ2VzLCBmdW5jdGlvbih4KSByZXF1aXJlKHgsIGNoYXJhY3Rlci5vbmx5ID0gVCwgcXVpZXRseSA9IFQpKSkKYGBgCgpgYGB7cn0KZ2MoKQpsaWJyYXJ5KGlwdW1zcikKYGBgCgpgYGB7cn0KZ2MoKQpzZXR3ZCgifi9Eb3dubG9hZHMiKQpgYGAKCiMjIyBSZWFkIHRoZSBJUFVNUyBOSElTIGRhdGEKYGBge3J9CmdjKCkKZGRpIDwtIHJlYWRfaXB1bXNfZGRpKCJuaGlzXzAwMDA4LnhtbCIpCmRhdGEgPC0gcmVhZF9pcHVtc19taWNybyhkZGkpCmRhdGE8LSBoYXZlbjo6emFwX2xhYmVscyhkYXRhKQpgYGAKCgojIyMgRmlsdGVyIGRhdGEgdG8gb25seSBtb3J0YWxpdHkgZWxpZ2libGUgYW5kIHlvdW5nIGFkdWx0IGFnZSAxOC0zNQpgYGB7cn0KZGF0YSA8LSBkYXRhICU+JQogIGZpbHRlcihNT1JURUxJRyA9PSAxKQoKZGF0YSA8LSBkYXRhICU+JQpmaWx0ZXIoQUdFID09MTgpCgpkYXRhIDwtIGRhdGElPiUKZmlsdGVyKGNvbXBsZXRlLmNhc2VzKC4pKQoKYGBgCgoKIyAxLglEZWZpbmUgeW91ciBldmVudCB2YXJpYWJsZSAKQW5zd2VyOiBZb3VuZyBBZHVsdCBtb3J0YWxpdHkgaW4gdGhlIFVuaXRlZCBTdGF0ZShBZ2UgMTgtMzUpLAoKVGhpcyBhbmFseXNpcyB1c2VzIE5ISVMgZGF0YSB0byBzZWUgdGhlIG1vcnRhbGl0eSBldmVudCBhbW9uZyB5b3VuZyBhZHVsdCBhZ2UgMTgtMzUgaW4gdGhlIFVuaXRlZCBzdGF0ZS4gRHVlIHRvIGRhdGEgbGltaXRhdGlvbiB0aGUgZGF0YSB1c2VkIDIwMDEtMjAxOCB2YXJpYWJsZSBhcyB0aG9zZSB3aG8gYXJlIGVsaWdpYmxlIGluIDIwMDEgd2lsbCBiZSAxOHllYXJzIGFuZCB0aGVpciBtYXhpbXVtIGFnZSBpbiAyMDE4IHdpbGwgYmUgMzV5ZWFycwoyMDAxLTIwMTgKCgojIDIuIERlZmluZSBhIGR1cmF0aW9uIG9yIHRpbWUgdmFyaWFibGUuCkkgd291bGQgYmUgY29uc2lkZXJpbmcgdGltZSBvZiB5b3VuZyBhZHVsdCBkZWF0aAoKCgojIDMuCURlZmluZSBhIGNlbnNvcmluZyBpbmRpY2F0b3IgCjEgbWVhbnMgZGVhZCwgMCBtZWFucyBhbGl2ZQoKCgoKIyA0LglFc3RpbWF0ZSB0aGUgc3Vydml2YWwgZnVuY3Rpb24gZm9yIHlvdXIgb3V0Y29tZSBhbmQgcGxvdCBpdCA6IEFnZSBhdCBEZWF0aAoKIyMgQ3Jvc3MgdGFidWxhdGlvbiBvZiBtb3J0YWxpdHkgc3RhdHVzIGFuZCB5ZWFyCmBgYHtyfQp0YWJsZShkYXRhJE1PUlRTVEFULCBkYXRhJFlFQVIpCmBgYAoKYGBge3J9CmRhdGEgPC0gZGF0YSAlPiUKICBtdXRhdGUoZGVhdGhfYWdlID0gaWZlbHNlKCBNT1JUU1RBVCA9PTEsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgIE1PUlRET0RZIC0gKFlFQVIgLSBBR0UpLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAyMDE4IC0gKFlFQVIgLSBBR0UpKSwgCiAgICAgICAgIGQuZXZlbnQgPSBpZmVsc2UoTU9SVFNUQVQgPT0gMSwgMSwgMCkpCgoKbGlicmFyeShzdXJ2aXZhbCkKCmFnZV9maXQgPC0gc3VydmZpdChTdXJ2KGRlYXRoX2FnZSwgZC5ldmVudCkgfiAxLCAKICAgICAgICAgICAgICAgICAgIGRhdGEgPSBkYXRhKQoKbGlicmFyeShnZ3N1cnZmaXQpCgphZ2VfZml0ICU+JQogIGdnc3VydmZpdCgpICsKICBhZGRfY29uZmlkZW5jZV9pbnRlcnZhbCh0eXBlID0gInJpYmJvbiIpICsKICBhZGRfcXVhbnRpbGUoKSAKYGBgCgoKCgoKIyA1YS4JS2FwbGFuLU1laWVyIHN1cnZpdmFsIGFuYWx5c2lzIG9mIHRoZSBvdXRjb21lKCB0aW1lIHRvIGRlYXRoKQojIFRpbWUgdG8gZGVhdGgKCmBgYHtyfQpkYXRhIDwtIGRhdGEgICU+JQogIG11dGF0ZShkZWF0aF90aW1lID0gaWZlbHNlKCBNT1JUU1RBVCA9PTEsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgIE1PUlRET0RZIC0gWUVBUiAsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgIDIwMTggLSBZRUFSICksIAogICAgICAgICBkNS5ldmVudCA9IGlmZWxzZShNT1JUU1RBVCA9PSAxICYgZGVhdGhfdGltZSA8PSA1LCAxLCAwKSkKCmxpYnJhcnkoc3Vydml2YWwpCgp0aW1lX2ZpdCA8LSBzdXJ2Zml0KFN1cnYoZGVhdGhfdGltZSwgZDUuZXZlbnQpIH4gMSwgCiAgICAgICAgICAgICAgICAgIGNvbmYudHlwZSA9ICJsb2ciLAogICAgICAgICAgICAgICAgICAgZGF0YSA9IGRhdGEgKQoKCnRpbWVfZml0ICU+JQogIGdnc3VydmZpdCgpKwogIHhsaW0oLTEsIDYpICsKICBhZGRfY2Vuc29yX21hcmsoKSArCiAgYWRkX2NvbmZpZGVuY2VfaW50ZXJ2YWwoKSsKICBhZGRfcXVhbnRpbGUoeV92YWx1ZSA9IC45NzUpIApgYGAKCgojIDUgYi4JRGVmaW5lIGEgZ3JvdXBpbmcgdmFyaWFibGUsIHRoaXMgY2FuIGJlIGRpY2hvdG9tb3VzIG9yIGNhdGVnb3JpY2FsCkdlbmRlciBpcyBteSBncm91cGluZyB2YXJpYWJsZQoKIyA1YyBEbyB5b3UgaGF2ZSBhIHJlc2VhcmNoIGh5cG90aGVzaXMgYWJvdXQgdGhlIHN1cnZpdmFsIHBhdHRlcm5zIGZvciB0aGUgbGV2ZWxzIG9mIHRoZSBjYXRlZ29yaWNhbCB2YXJpYWJsZT8gIFN0YXRlIGl0LgpNeSBBc3N1bXB0aW9uIGlzIHRoYXQgbWFsZXMgd2lsbCBoYXZlIGhpZ2hlciBtb3J0YWxpdHkgY29tcGFyZWQgdG8gbWFsZSB5b3VuZyBhZHVsdChBZ2UxOC0zNSkKCgoKCiNkLjVkIAlDb21wYXJpc29uIG9mIEthcGxhbi1NZWllciBzdXJ2aXZhbCBhY3Jvc3MgZ3JvdXBpbmcgdmFyaWFibGVzIGluIHlvdXIgZGF0YS4gSW50ZXJwcmV0IHlvdXIgcmVzdWx0cy4KYGBge3J9CiMgVGVzdGluZyB0aGUgaHlwb3RoZXNpcyB0byBzZWUgaWYgdGhlcmUgaXMgYSBzaWduaWZpY2FudCBkaWZmZXJlbmNlIGluIHRoZSBQaGFybWFjeSBzdXJ2aXZhbCBjaGFuY2VzIGJldHdlZW4gdGhlIHR3byBncm91cHMgCgpzdXJ2ZGlmZihTdXJ2KGRlYXRoX3RpbWUsIGQuZXZlbnQpIH4gU0VYLCAKICAgICAgICAgICAgICAgICAgIGRhdGEgPSBkYXRhKQpgYGAKCgpUaGUgcmVzdWx0IHNob3dzIHRoYXQgdGhlcmUgaXMgYSBzaWduaWZpY2FudCBkaWZmZXJlbmNlIGJldHdlZW4gbWFsZXMgYW5kIGZlbWFsZXMsIGFzIG1hbGUgd2FzIGhpZ2hlciBhcyBoeXBvdGhlc2l6ZWQuIAoKCiMgNWUuCVBsb3QgdGhlIHN1cnZpdmFsIGZ1bmN0aW9uIGZvciB0aGUgYW5hbHlzaXMgZm9yIGVhY2ggbGV2ZWwgb2YgdGhlIGdyb3VwIHZhcmlhYmxlCgpgYGB7cn0KIyBFc3RpbWF0aW5nIHRoZSBzdXJ2aXZhbCBmdW5jdGlvbiBmb3IgdGhlIGdyb3Vwcwpncm91cF9maXQgPC0gc3VydmZpdChTdXJ2KGRlYXRoX3RpbWUsIGQuZXZlbnQpIH4gU0VYLCAKICAgICAgICAgICAgICAgICAgIGRhdGEgPSBkYXRhKQoKI3N1bW1hcnkoZ3JvdXBfZml0KQojIFBsb3R0aW5nIHRoZSBTdXJ2aXZhbCBmdW5jdGlvbnMgZm9yIHRoZSB0d28gZ3JvdXBzCmdyb3VwX2ZpdCU+JQogIGdnc3VydmZpdCgpICsKICAgYWRkX2NvbmZpZGVuY2VfaW50ZXJ2YWwoKSArCiAgIGFkZF9yaXNrdGFibGUoKSArCiAgbGFicyh0aXRsZSA9ICJZb3VuZyBhZHVsdCBtb3J0YWxpdHkgU3Vydml2YWwgUGxvdCBieSBHZW5kZXIiLAogICAgICAgY2FwdGlvbiA9ICJTb3VyY2U6IE5ISVMgMjAwMS0yMDE4IFxuIENhbGN1bGF0aW9ucyBieSBKb3NlcGggSmFpeWVvbGEiLCB4PSAidGltZSIsCiAgICAgIHkgPSAiU3Vydml2YWwgUHJvYmFiaWxpdHkiKSsgIHRoZW1lX21pbmltYWwoKSArCiAgdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gImJvdHRvbSIsYXhpcy5saW5lID0gZWxlbWVudF9saW5lKGxpbmV0eXBlID0gInNvbGlkIiksCiAgICBheGlzLnRpY2tzID0gZWxlbWVudF9saW5lKGxpbmV0eXBlID0gImJsYW5rIiksCiAgICBwYW5lbC5ncmlkLm1ham9yID0gZWxlbWVudF9saW5lKGNvbG91ciA9ICJncmF5ODAiKSwKICAgIHBhbmVsLmdyaWQubWlub3IgPSBlbGVtZW50X2xpbmUoY29sb3VyID0gImdyYXk5NCIsCiAgICAgICAgbGluZXR5cGUgPSAiYmxhbmsiKSwgYXhpcy50aXRsZSA9IGVsZW1lbnRfdGV4dChmYW1pbHkgPSAic2VyaWYiLAogICAgICAgIGZhY2UgPSAiYm9sZCIpLCBheGlzLnRleHQgPSBlbGVtZW50X3RleHQoZmFjZSA9ICJib2xkIiksCiAgICBwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KGZhbWlseSA9ICJzZXJpZiIsCiAgICAgICAgc2l6ZSA9IDE0LCBmYWNlID0gImJvbGQiKSAsIGxlZ2VuZC50ZXh0ID0gZWxlbWVudF90ZXh0KGZhY2UgPSAiYm9sZCIsCiAgICAgICAgZmFtaWx5ID0gInNlcmlmIiksIGxlZ2VuZC50aXRsZSA9IGVsZW1lbnRfdGV4dChmYWNlID0gImJvbGQiLAogICAgICAgIGZhbWlseSA9ICJzZXJpZiIpLCBwYW5lbC5iYWNrZ3JvdW5kID0gZWxlbWVudF9yZWN0KGZpbGwgPSAiZ3JheTk3IiksCiAgICBsZWdlbmQua2V5ID0gZWxlbWVudF9yZWN0KGZpbGwgPSBOQSwKICAgICAgICBsaW5ldHlwZSA9ICJzb2xpZCIpLCBsZWdlbmQuYmFja2dyb3VuZCA9IGVsZW1lbnRfcmVjdChsaW5ldHlwZSA9ICJzb2xpZCIpKSArCiAgZ3VpZGVzKGNvbG9yID0gZ3VpZGVfbGVnZW5kKG5jb2wgPSAxKSkgKwogICAgY29vcmRfY2FydGVzaWFuKHhsaW0gPSBjKDAsIDMwKSkgKwogIHNjYWxlX3lfY29udGludW91cygKICAgIGxpbWl0cyA9IGMoLjk1LCAxKSwKICAgIGxhYmVscyA9IHNjYWxlczo6cGVyY2VudCwgCiAgICBleHBhbmQgPSBjKDAuMDEsIDApKSsKICBzY2FsZV9jb2xvcl9tYW51YWwodmFsdWVzID0gYygnIzU0NzM4RScsICcjODJBQzdDJykpICsKICBzY2FsZV9maWxsX21hbnVhbCh2YWx1ZXMgPSBjKCcjNTQ3MzhFJywgJyM4MkFDN0MnKSkgCmBgYAoKCgoK