EHA Cox Regression

library(haven)
library(tidyverse)

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.2.1      ✔ stringr 1.4.1 
✔ readr   2.1.2      ✔ forcats 0.5.2 
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

library(car)

Loading required package: carData

Attaching package: 'car'

The following object is masked from 'package:dplyr':

    recode

The following object is masked from 'package:purrr':

    some

nhis <- read_stata("C:/Users/maman/OneDrive - University of Texas at San Antonio/Event History Analysis Data/nhis_00002.dta.gz")

nhis <- haven::zap_labels(nhis)

nhis <- nhis %>%
  filter(mortelig == 1)
#Creating covariates
nhis$sex1 <- Recode(nhis$sex,
                    recodes= "1='male'; 2='female'; else=NA")
nhis$cit <- Recode(nhis$citizen,
                      recodes= "1='no'; 2='yes'; else=NA")
nhis$obese <- Recode(nhis$bmicat,
                     recodes= "1:3='no'; 4='yes'; else=NA")
nhis$education <- Recode(nhis$educrec2,
                         recodes= "10:41='less than hs'; 42='hs grad'; 50:53= 'some college'; 54='college degree'; 60='more than college'; else=NA")
nhis$pov <- Recode(nhis$pooryn,
                   recodes= "1='at or above'; 2='below'; else=NA")
nhis$smokstat <- Recode(nhis$smokfreqnow,
                        recodes= "1='non-smoker'; 2='current smoker'; 3='evry day smoker'; else=NA")
nhis$marstat1 <- Recode(nhis$marstat,
                        recodes= "10:13='married'; 20='widowed'; 30='divorced'; 40='separated'; 50='never married'; else=NA")

nhis$white <- Recode(nhis$racenew,
                     recodes= "100=1; 997:999=NA; else=0")
nhis$black <- Recode(nhis$racenew,
                      recodes= "200=1; 997:999=NA; else=0")
nhis$othr <- Recode(nhis$racenew,
                    recodes= "300:542=1; 997:999=NA; else=0")
nhis$hisp <- Recode(nhis$hispeth,
                    recodes= "10=0; 20:70=1; else=NA")
nhis$hisp1 <- Recode(nhis$hispeth,
                    recodes= "10='no'; 20:70='yes'; else=NA")
nhis$race_eth[nhis$hisp == 0 & nhis$white==1]<-"NHWhite"

Warning: Unknown or uninitialised column: `race_eth`.

nhis$race_eth[nhis$hisp == 0 & nhis$black==1]<-"NHBlack"
nhis$race_eth[nhis$hisp == 0 & nhis$othr==1]<-"NHOthr"
nhis$race_eth[nhis$hisp == 1]<-"Hispanic"
nhis$race_eth[is.na(nhis$hisp) ==T | is.na(nhis$white)==T | is.na(nhis$black)==T | is.na(nhis$othr) ==T]<-NA

# making covariates for Hispanic subpopulation
nhis_h <- nhis %>% 
  filter(hisp1=="yes")

nhis_h$sex2 <- Recode(nhis_h$sex,
                    recodes= "1=1; 2=0; else=NA")
nhis_h$pov1 <- Recode(nhis_h$pooryn,
                   recodes= "1=0; 2=1; else=NA")
nhis_h$smk <- Recode(nhis_h$smokfreqnow,
                     recodes= "recodes= 1=0; 2:3=1; else=NA")
nhis_h$cit <- Recode(nhis_h$citizen,
                      recodes= "1=0; 2=1; else=NA")
nhis_h$obese_yn <- Recode(nhis_h$bmicat,
                          recodes= "1:3=0; 4=1; else=NA")

nhis_h <- nhis_h %>%
  mutate(death_age = ifelse( mortstat ==1, 
                             mortdody - (year - age), 
                             2018 - (year - age)), 
         d.event = ifelse(mortstat == 1, 1, 0))

A. Cox regression models

1) Carry out the following analysis:

Define your outcome as in HW 1. Also consider what covariates are hypothesized to affect the outcome variable. Define these and construct a parametric model for your outcome.

The outcome of interest is mortality risk associated with obesity by race/ethnicity with a specific focus on the Latino population.

There are a few independent variables of interest. 1) Obesity status is the main consideration since this state is associated with poorer health outcomes. 2) Race/Ethnicity will be used to examine differences in mortality risk by subgroup. 3) Length of stay or number of years spent in the U.S will be used to compare the mortality risk between Latinos that have spent less time in the U.S. versus those who have lived in the U.S. for an extended period of time.

Relevant covariates:

Age
Sex
Education
Marital Status
Poverty
Self reported health
Health behaviors (smoking and alcohol consumption)

Fit the Cox model to the data.

library(survival)
library(eha)

nhis1 <- nhis %>%
  filter(complete.cases(.)) %>% 
  mutate(death_age = ifelse( mortstat ==1, 
                             mortdody - (year - age), 
                             2018 - (year - age)), 
         d.event = ifelse(mortstat == 1, 1, 0))

age_fit <- survfit(Surv(death_age, d.event) ~ 1, 
                   data = nhis1)

library(ggsurvfit)

age_fit %>%
  ggsurvfit() +
  add_confidence_interval(type = "ribbon") +
  add_quantile()

Baseline Hazard Model

After fitting the Cox model, the results show that there were 1,469 events (deaths) total. Compared to those who are not obese, those that are had a 24% increased risk of experiencing the event.

library(survey)

Loading required package: grid

Loading required package: Matrix


Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack


Attaching package: 'survey'

The following object is masked from 'package:graphics':

    dotchart

des<-svydesign(ids=~psu, strata=~strata,
               data=nhis1,
               weight=~mortwtsa,
               nest=TRUE)

#baseline cox model
fit.cox <- svycoxph(Surv(death_age, d.event)~obese, design=des )

summary(fit.cox)

Stratified 1 - level Cluster Sampling design (with replacement)
With (657) clusters.
svydesign(ids = ~psu, strata = ~strata, data = nhis1, weight = ~mortwtsa, 
    nest = TRUE)

Call:
svycoxph(formula = Surv(death_age, d.event) ~ obese, design = des)

  n= 31306, number of events= 1469 

            coef exp(coef) se(coef) robust se     z Pr(>|z|)   
obeseyes 0.21551   1.24049  0.06465   0.07436 2.898  0.00375 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

         exp(coef) exp(-coef) lower .95 upper .95
obeseyes      1.24     0.8061     1.072     1.435

Concordance= 0.53  (se = 0.013 )
Likelihood ratio test= NA  on 1 df,   p=NA
Wald test            = 8.4  on 1 df,   p=0.004
Score (logrank) test = NA  on 1 df,   p=NA

  (Note: the likelihood ratio and score tests assume independence of
     observations within a cluster, the Wald and robust score tests do not).

Hazard model with additional covariates

In addition to obesity status, this Cox model included race/ethnicity, sex, and educational attainment. The effect of obesity remained significant and the amount of increased risk remained about the same at 20%. Compared to Hispanics, non-Hispanic Blacks experience a 77% increased risk of experiencing the event, whereas for non-Hispanic Whites the risk is similar to Hispanics. The risk for college graduates and those with education past college is about the same. Compared to college graduates, those with lower levels of educational attainment have a greater risk of experiencing the event.

cox1 <- svycoxph(Surv(death_age, d.event)~obese+race_eth+sex1+education, design=des )

summary(cox1)

Stratified 1 - level Cluster Sampling design (with replacement)
With (657) clusters.
svydesign(ids = ~psu, strata = ~strata, data = nhis1, weight = ~mortwtsa, 
    nest = TRUE)

Call:
svycoxph(formula = Surv(death_age, d.event) ~ obese + race_eth + 
    sex1 + education, design = des)

  n= 31306, number of events= 1469 

                              coef exp(coef) se(coef) robust se     z Pr(>|z|)
obeseyes                   0.18437   1.20246  0.06509   0.07558 2.439  0.01471
race_ethNHBlack            0.57158   1.77106  0.14125   0.18251 3.132  0.00174
race_ethNHOthr             0.01996   1.02016  0.19341   0.24364 0.082  0.93471
race_ethNHWhite            0.08463   1.08832  0.12138   0.16699 0.507  0.61228
sex1male                   0.35645   1.42825  0.06123   0.06632 5.375 7.66e-08
educationhs grad           0.72379   2.06223  0.10991   0.11049 6.551 5.72e-11
educationless than hs      0.53316   1.70432  0.12508   0.12680 4.205 2.61e-05
educationmore than college 0.06646   1.06871  0.15224   0.15552 0.427  0.66915
educationsome college      0.53906   1.71439  0.11470   0.11817 4.562 5.07e-06
                              
obeseyes                   *  
race_ethNHBlack            ** 
race_ethNHOthr                
race_ethNHWhite               
sex1male                   ***
educationhs grad           ***
educationless than hs      ***
educationmore than college    
educationsome college      ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

                           exp(coef) exp(-coef) lower .95 upper .95
obeseyes                       1.202     0.8316    1.0369     1.394
race_ethNHBlack                1.771     0.5646    1.2384     2.533
race_ethNHOthr                 1.020     0.9802    0.6328     1.645
race_ethNHWhite                1.088     0.9188    0.7845     1.510
sex1male                       1.428     0.7002    1.2542     1.626
educationhs grad               2.062     0.4849    1.6607     2.561
educationless than hs          1.704     0.5867    1.3293     2.185
educationmore than college     1.069     0.9357    0.7879     1.450
educationsome college          1.714     0.5833    1.3600     2.161

Concordance= 0.626  (se = 0.014 )
Likelihood ratio test= NA  on 9 df,   p=NA
Wald test            = 121.5  on 9 df,   p=<2e-16
Score (logrank) test = NA  on 9 df,   p=NA

  (Note: the likelihood ratio and score tests assume independence of
     observations within a cluster, the Wald and robust score tests do not).

Survival plot

##survivalship
plot(survfit(cox1),
     main="mortality survival cox regression")

To test the proportionality assumption of the model Schoenfeld residuals will be extracted and tested. The formal test from Grambsch and Therneau suggest that the proportionality assumption is met.

#Schoenfeld test
fit.test<-cox.zph(cox1)
fit.test

             chisq df    p
obese     4.24e-05  1 0.99
race_eth  4.09e-04  3 1.00
sex1      3.29e-06  1 1.00
education 5.44e-04  4 1.00
GLOBAL    9.68e-04  9 1.00

Plots of schoenfeld residuals also show no significant relationship, which is good news!

plot(fit.test, df=4)

Martingale residuals are examined to assess the functional form of a covariate.

mart<- resid (cox1, type = "martingale")

plot(mart)