library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ipumsr)
library(haven)
library(survival)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(survminer)
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
##
## The following object is masked from 'package:survival':
##
## myeloma
library(ggplot2)
library(ggpubr)
library(muhaz)
library(carData)
library(dplyr)
library(eha)
** Read IPUMS NHIS data ** # I select data from 1990 to 2017
nhis <- read_ipums_ddi("nhis_00008.xml")
nhis_dat <- read_ipums_micro(nhis)
## Use of data from IPUMS NHIS is subject to conditions including that users
## should cite the data appropriately. Use command `ipums_conditions()` for more
## details.
nhis_dat <- haven::zap_labels(nhis_dat)
** Recode variable **
nhis_dat$MARST<- as.numeric(nhis_dat$MARST)
nhis_dat$MARST <- car:: Recode(nhis_dat$MARST, recodes= "11:40= 'married/ever married'; 50= 'unmarried'; else= NA", as.factor= T)
nhis_dat$EDUC <- car:: Recode(nhis_dat$EDUC, recodes= "100:116= 'LHS'; 201= 'High School'; 400='Bachelor'; 500:503='Graduate'; else= NA", as.factor= T)
nhis_dat <- nhis_dat %>%
filter(MORTELIG == 1)
nhis_dat <- nhis_dat %>%
filter(SEX == 2)
nhis_dat <- nhis_dat%>%
filter(complete.cases(.))
1) Carry out the following analysis:
#Define your outcome as in HW 1.Also consider what covariates are hypothesized to affect the outcome variable:
#The covariates hypothesis are Marital Status and Educational Attaintment.
#My research hypothesis is that married women has higher mortality comapred to single women. The higher the education attaintment, the lower the mortality rate.
nhis_dat<-nhis_dat %>%
mutate(death_age = ifelse( MORTSTAT ==1,
MORTDODY - YEAR ,
2017 - YEAR ),
d.event = ifelse(MORTSTAT == 1 & death_age <= 1, 1, 0))
library(survival)
time_fit <- survfit(Surv(death_age, d.event) ~ 1,
conf.type = "log",
data = nhis_dat)
library(ggsurvfit)
time_fit %>%
ggsurvfit()+
xlim(-1, 6) +
add_censor_mark() +
add_confidence_interval(type = "ribbon")+
add_quantile()
## Warning: Removed 16 row(s) containing missing values (geom_path).
#Fit the parametric model of your choosing to the data.
#Proportional Hazards Model (PH) because I want to see if the effect of the covariates increase or decrease the hazard by a proportionate amount of time.
#b.Justify what parametric distribution you choose
#Weibull because it is a flexible distribution. #c.Carry out model fit diagnostics for the model:
day<- 1/365
fit.wei<-phreg(Surv(death_age+day, d.event)~MARST+EDUC,
data=nhis_dat,
dist="weibull")
summary(fit.wei)
## Covariate Mean Coef Rel.Risk S.E. LR p
## MARST 0.0000
## married/ever mar 0.798 0 1 (reference)
## unmarried 0.202 -1.176 0.309 0.051
## EDUC 0.0000
## Bachelor 0.223 0 1 (reference)
## Graduate 0.104 -0.148 0.862 0.076
## High School 0.396 0.840 2.317 0.047
## LHS 0.277 1.286 3.618 0.046
##
## Events 5582
## Total time at risk 4296898
## Max. log. likelihood -32347
## LR test statistic 2015.52
## Degrees of freedom 4
## Overall p-value 0
plot(fit.wei)
** Test for an interaction between at least two of the predictors **
survdiff(Surv(death_age, d.event)~MARST+EDUC, data=nhis_dat)
## Call:
## survdiff(formula = Surv(death_age, d.event) ~ MARST + EDUC, data = nhis_dat)
##
## N Observed Expected (O-E)^2/E
## MARST=married/ever married, EDUC=Bachelor 79073 514 1009 242.6
## MARST=married/ever married, EDUC=Graduate 41160 224 523 171.0
## MARST=married/ever married, EDUC=High School 133856 2122 1718 95.0
## MARST=married/ever married, EDUC=LHS 93468 2309 1201 1022.6
## MARST=unmarried, EDUC=Bachelor 21490 56 273 172.6
## MARST=unmarried, EDUC=Graduate 8461 22 107 67.4
## MARST=unmarried, EDUC=High School 33399 158 429 171.0
## MARST=unmarried, EDUC=LHS 25048 177 323 65.8
## (O-E)^2/V
## MARST=married/ever married, EDUC=Bachelor 298.4
## MARST=married/ever married, EDUC=Graduate 190.1
## MARST=married/ever married, EDUC=High School 138.3
## MARST=married/ever married, EDUC=LHS 1313.0
## MARST=unmarried, EDUC=Bachelor 182.9
## MARST=unmarried, EDUC=Graduate 69.2
## MARST=unmarried, EDUC=High School 186.7
## MARST=unmarried, EDUC=LHS 70.4
##
## Chisq= 2024 on 7 degrees of freedom, p= <2e-16
** Interpretation ** # The p-value is less than .05, indicating there is significant relationship between Marital Status, Education Attaintment, and Female adult mortality. So, we can say that there is significant interactions between them.
** Tabular and Graphical Output **
fit.gom<-phreg(Surv(death_age, d.event)~MARST + EDUC,
data=nhis_dat,
dist="gompertz")
summary(fit.gom)
## Covariate Mean Coef Rel.Risk S.E. LR p
## MARST 0.0000
## married/ever mar 0.798 0 1 (reference)
## unmarried 0.202 -1.177 0.308 0.051
## EDUC 0.0000
## Bachelor 0.223 0 1 (reference)
## Graduate 0.104 -0.114 0.892 0.076
## High School 0.396 0.798 2.221 0.047
## LHS 0.277 1.260 3.526 0.046
##
## Events 5582
## Total time at risk 4295704
## Max. log. likelihood -41716
## LR test statistic 1926.21
## Degrees of freedom 4
## Overall p-value 0
plot(fit.gom)