library(readxl)
db2017<-read_xlsx("~/Documents/1A_DEM Doctorate/NVSS - CDC birth linked infant mort/Infant Mortality 2017.xlsx")
#summary(db2017)

Part A

Data set and event

We used the publically available, National Vital Statistics System (NVSS) data of all infant deaths in the year 2017 in this analysis (https://www.cdc.gov/nchs/data_access/vitalstatsonline.htm). Infant births and maternal data were linked to the cohort’s individual death certificates. The dataset chosen is comprised of records of infant deaths only. There was a total of 22,280 records availabe in this cohort and the NVSS reports that they matched over 97% of all the births to their death certificates.The event of interest was infant (<1 year of age) that survived beyond the day 3 of birth (i.e >3 days of age censor variable) death. The time variable was measured in days (how many days the infant survived beyond the day of birth, age of death). In this analysis the following predictors will be included: Mother’s age, mother’s BMI, did mother smoke, gestational diabetes/hypertension, eclampsia, birth weight, birth in the hospital. The objective will be to identify the rate of infant death in 3 days time that would benefit from increased monitoring in the first 3 days of birth to potentially reduce risk of potentially, preventable early death.

Part B

Key Variables

  • Event variable: Infant mortality (Death of infant (<1 year old) after they survived the first day of life)
  • Time variable: The variable will be age of death in days; therefore episode is days
  • Censoring indicator: Died on day 3 of birth
  • Predictors: Mother’s age, mother’s nativiy, mother’s race, marital status, did mother smoke, gestational diabetes and hypertension, eclampsia, birth weight, birth in the hospital
#recoding variables
#recodeUS mother variable
library (dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library (car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
db2017$USmother<-Recode(db2017$nativity, recodes="1='US'; 2='other'; else=NA")

#recode mother race
db2017$race<-Recode(db2017$mothersracerecode6, recodes="1='White'; 2='Black'; 3='Other'; 4='Other'; 5='Other'; 6='Other';else=NA")

#recode married mother variable
db2017$married<-Recode(db2017$maritalstatus, recodes="1='Married'; 2='No'; 3='No'; else=NA")

#recode hospital birth place variable
db2017$hospb<-Recode(db2017$birthinhospital, recodes="1=1; 2=2; else=NA")

#recode did mother smoke
db2017$smoke<-Recode(db2017$cigaretterecde, recodes="1=1; 2=2; else=NA")



#censoring
db2017$censorevent7<-ifelse(is.na(db2017$ageofdeath)==T|db2017$ageofdeath>7, 1, 0)
db2017$censorevent3<-ifelse(is.na(db2017$ageofdeath)==T|db2017$ageofdeath>3, 1, 0)
db2017$censorevent1<-ifelse(is.na(db2017$ageofdeath)==T|db2017$ageofdeath>0, 1, 0)
sum(db2017$censorevent) #number of infants survived beyond day of birth
## Warning: Unknown or uninitialised column: 'censorevent'.
## [1] 0
perc<-sum(db2017$censorevent3)/22280
perc #percent of sample that survived beyond day of birth
## [1] 0.5038151
db2017$ceneventfac3<-factor(db2017$censorevent3)
levels(db2017$ceneventfac3)<-c("Mort", "Survived")
summary(db2017$ceneventfac3)
##     Mort Survived 
##    11055    11225

A total of 11225 infants died by day 3 of birth out of 22280 infant deaths in 2017. Therefore about half of infant deaths occur the first 3 days following birth.

Only complete cases

db2017<-db2017%>% 
  filter(complete.cases(censorevent7, censorevent3, censorevent1, ceneventfac3, ageofdeath, mothersage, USmother, race, married, smoke, hospb, gestationaldm, gestationalhypertension, hypertensioneclampsia, birthweightrecode4, recordweight))

db2017$eventfac3<-factor(db2017$censorevent3)
levels(db2017$eventfac3)<-c("Mort", "Survived")
summary(db2017$eventfac3)
##     Mort Survived 
##      979     1749

In the subset with complete cases a total of 979 infants died by day 3 of birth out of 2728 infant deaths in 2017. Therefore 35.9% of infant deaths occur within the first 3 days following birth. There may be reason to believe linked data is biased to births that survived beyond 3 days following birth.

Part C. Carry out the following analysis.

Fit the Cox model

    1. include all main effects in the model
library (survival)
library (eha)
fit.coxph <- coxph(Surv(ageofdeath, censorevent3)~ mothersage + race + married + USmother + gestationaldm + gestationalhypertension + hypertensioneclampsia + birthweightrecode4 + hospb, data=db2017)
summary(fit.coxph)
## Call:
## coxph(formula = Surv(ageofdeath, censorevent3) ~ mothersage + 
##     race + married + USmother + gestationaldm + gestationalhypertension + 
##     hypertensioneclampsia + birthweightrecode4 + hospb, data = db2017)
## 
##   n= 2728, number of events= 1749 
## 
##                              coef exp(coef)  se(coef)      z Pr(>|z|)    
## mothersage               0.003531  1.003538  0.004496  0.785  0.43220    
## raceOther                0.101629  1.106973  0.105321  0.965  0.33457    
## raceWhite                0.181487  1.198999  0.061717  2.941  0.00328 ** 
## marriedNo                0.126220  1.134531  0.059856  2.109  0.03497 *  
## USmotherUS               0.041317  1.042183  0.191566  0.216  0.82924    
## gestationaldm            0.080475  1.083802  0.112483  0.715  0.47434    
## gestationalhypertension -0.011834  0.988236  0.104003 -0.114  0.90941    
## hypertensioneclampsia   -0.748896  0.472888  0.458299 -1.634  0.10224    
## birthweightrecode4      -0.171084  0.842751  0.032654 -5.239 1.61e-07 ***
## hospb                    0.003688  1.003695  0.225736  0.016  0.98696    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                         exp(coef) exp(-coef) lower .95 upper .95
## mothersage                 1.0035     0.9965    0.9947    1.0124
## raceOther                  1.1070     0.9034    0.9005    1.3608
## raceWhite                  1.1990     0.8340    1.0624    1.3532
## marriedNo                  1.1345     0.8814    1.0089    1.2758
## USmotherUS                 1.0422     0.9595    0.7159    1.5171
## gestationaldm              1.0838     0.9227    0.8694    1.3511
## gestationalhypertension    0.9882     1.0119    0.8060    1.2117
## hypertensioneclampsia      0.4729     2.1147    0.1926    1.1611
## birthweightrecode4         0.8428     1.1866    0.7905    0.8985
## hospb                      1.0037     0.9963    0.6448    1.5622
## 
## Concordance= 0.563  (se = 0.008 )
## Likelihood ratio test= 38.26  on 10 df,   p=3e-05
## Wald test            = 38.75  on 10 df,   p=3e-05
## Score (logrank) test = 38.91  on 10 df,   p=3e-05
    1. test for an interaction between at least two of the predictors
fit.coxph.interact <- coxph(Surv(ageofdeath, censorevent3)~ mothersage + married + USmother + gestationaldm + gestationalhypertension + hypertensioneclampsia + hospb +  birthweightrecode4*race, data=db2017)
summary(fit.coxph.interact)
## Call:
## coxph(formula = Surv(ageofdeath, censorevent3) ~ mothersage + 
##     married + USmother + gestationaldm + gestationalhypertension + 
##     hypertensioneclampsia + hospb + birthweightrecode4 * race, 
##     data = db2017)
## 
##   n= 2728, number of events= 1749 
## 
##                                    coef  exp(coef)   se(coef)      z
## mothersage                    0.0034661  1.0034721  0.0044961  0.771
## marriedNo                     0.1255269  1.1337456  0.0600374  2.091
## USmotherUS                    0.0428623  1.0437942  0.1916309  0.224
## gestationaldm                 0.0803931  1.0837130  0.1125502  0.714
## gestationalhypertension      -0.0140258  0.9860721  0.1040635 -0.135
## hypertensioneclampsia        -0.7744941  0.4609369  0.4607393 -1.681
## hospb                         0.0009308  1.0009312  0.2258798  0.004
## birthweightrecode4           -0.2078860  0.8122996  0.0652899 -3.184
## raceOther                     0.1984797  1.2195473  0.3659676  0.542
## raceWhite                     0.0509066  1.0522246  0.1857322  0.274
## birthweightrecode4:raceOther -0.0353955  0.9652236  0.1439637 -0.246
## birthweightrecode4:raceWhite  0.0559720  1.0575681  0.0756777  0.740
##                              Pr(>|z|)   
## mothersage                    0.44075   
## marriedNo                     0.03655 * 
## USmotherUS                    0.82301   
## gestationaldm                 0.47505   
## gestationalhypertension       0.89279   
## hypertensioneclampsia         0.09277 . 
## hospb                         0.99671   
## birthweightrecode4            0.00145 **
## raceOther                     0.58758   
## raceWhite                     0.78402   
## birthweightrecode4:raceOther  0.80579   
## birthweightrecode4:raceWhite  0.45954   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                              exp(coef) exp(-coef) lower .95 upper .95
## mothersage                      1.0035     0.9965    0.9947    1.0124
## marriedNo                       1.1337     0.8820    1.0079    1.2753
## USmotherUS                      1.0438     0.9580    0.7170    1.5196
## gestationaldm                   1.0837     0.9228    0.8692    1.3512
## gestationalhypertension         0.9861     1.0141    0.8041    1.2092
## hypertensioneclampsia           0.4609     2.1695    0.1868    1.1372
## hospb                           1.0009     0.9991    0.6429    1.5584
## birthweightrecode4              0.8123     1.2311    0.7147    0.9232
## raceOther                       1.2195     0.8200    0.5952    2.4987
## raceWhite                       1.0522     0.9504    0.7312    1.5143
## birthweightrecode4:raceOther    0.9652     1.0360    0.7279    1.2799
## birthweightrecode4:raceWhite    1.0576     0.9456    0.9118    1.2267
## 
## Concordance= 0.566  (se = 0.008 )
## Likelihood ratio test= 39.14  on 12 df,   p=1e-04
## Wald test            = 39.15  on 12 df,   p=1e-04
## Score (logrank) test = 39.41  on 12 df,   p=9e-05
    1. perform a likelihood ratio test for two nested models.
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
regTermTest(fit.coxph, ~race, method="LRT")
## Working (Rao-Scott+F) LRT for race
##  in coxph(formula = Surv(ageofdeath, censorevent3) ~ mothersage + 
##     race + married + USmother + gestationaldm + gestationalhypertension + 
##     hypertensioneclampsia + birthweightrecode4 + hospb, data = db2017)
## Working 2logLR =  9.081372 p= 0.010979 
## (scale factors:  1 1 );  denominator df= 2718
regTermTest(fit.coxph, ~birthweightrecode4, method="LRT")
## Working (Rao-Scott+F) LRT for birthweightrecode4
##  in coxph(formula = Surv(ageofdeath, censorevent3) ~ mothersage + 
##     race + married + USmother + gestationaldm + gestationalhypertension + 
##     hypertensioneclampsia + birthweightrecode4 + hospb, data = db2017)
## Working 2logLR =  26.48544 p= 3.1151e-07 
## df=1;  denominator df= 2718
regTermTest(fit.coxph, ~USmother, method="LRT")
## Working (Rao-Scott+F) LRT for USmother
##  in coxph(formula = Surv(ageofdeath, censorevent3) ~ mothersage + 
##     race + married + USmother + gestationaldm + gestationalhypertension + 
##     hypertensioneclampsia + birthweightrecode4 + hospb, data = db2017)
## Working 2logLR =  0.04713079 p= 0.82156 
## df=1;  denominator df= 2718
regTermTest(fit.coxph, ~hospb, method="LRT")
## Working (Rao-Scott+F) LRT for hospb
##  in coxph(formula = Surv(ageofdeath, censorevent3) ~ mothersage + 
##     race + married + USmother + gestationaldm + gestationalhypertension + 
##     hypertensioneclampsia + birthweightrecode4 + hospb, data = db2017)
## Working 2logLR =  0.0002666089 p= 0.98581 
## df=1;  denominator df= 2718
fit.scho<-cox.zph(fit.coxph)
fit.scho
##                              rho    chisq        p
## mothersage              -0.00961  0.16215 6.87e-01
## raceOther                0.01912  0.64152 4.23e-01
## raceWhite               -0.03430  2.05952 1.51e-01
## marriedNo               -0.01503  0.39867 5.28e-01
## USmotherUS              -0.04785  4.00128 4.55e-02
## gestationaldm            0.01345  0.31403 5.75e-01
## gestationalhypertension -0.00159  0.00452 9.46e-01
## hypertensioneclampsia   -0.01188  0.25403 6.14e-01
## birthweightrecode4       0.17990 59.75297 1.08e-14
## hospb                   -0.03915  2.69340 1.01e-01
## GLOBAL                        NA 73.06210 1.13e-11
plot(fit.scho, df=2)

#fit.resmart <- resid(fit.coxph, type="martingale")
#scatter.smooth(db2017$variables$mothersage, fit.resmart, degree = 2,span = 1, ylab="Martingale Residual",col=1,  cex=.25, lpars=list(col = "red", lwd = 5))

We used the schoefeld method and did not find evidence to support non-proportionality other than with birth weight. The latter is to be expected, the longer the infant lives presents the opportunity for greater period of time for growth and weight gain. That or higher weight of infant at birth also predisposes the infant for an opportunity for longer survival.

Part D

Discrete Time Hazard Model

Unable to perform the following method, potentially due to the selected dataset.

#db2017pp <- survSplit(Surv(ageofdeath, censorevent3)~. , data = db2017, cut=c(0,1,3),  episode="deathmonth")
#db2017pp <- db2017pp[order(db2017pp$id, pp$birthmonth),]
#head(db2017pp[, c("id", "ageofdeath", "censorevent3", "birthmonth")], n=10)
#fit.dthaz<-svyglm(ceneventfac3~as.factor(month), design=des,family=binomial)
library (table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
table1(~mothersage + race + married + USmother + as.factor(gestationaldm) + as.factor(gestationalhypertension) + as.factor(hypertensioneclampsia) + as.factor(birthweightrecode4) + as.factor(hospb) | censorevent3, data=db2017)
## Warning in table1.formula(~mothersage + race + married + USmother +
## as.factor(gestationaldm) + : Terms to the right of '|' in formula 'x'
## define table columns and are expected to be factors with meaningful labels.
0
(n=979)
1
(n=1749)
Overall
(n=2728)
mothersage
Mean (SD) 27.8 (5.67) 26.9 (5.44) 27.2 (5.54)
Median [Min, Max] 27.0 [16.0, 50.0] 26.0 [14.0, 46.0] 27.0 [14.0, 50.0]
race
Black 231 (23.6%) 355 (20.3%) 586 (21.5%)
Other 71 (7.3%) 124 (7.1%) 195 (7.1%)
White 677 (69.2%) 1270 (72.6%) 1947 (71.4%)
married
Married 239 (24.4%) 376 (21.5%) 615 (22.5%)
No 740 (75.6%) 1373 (78.5%) 2113 (77.5%)
USmother
other 24 (2.5%) 28 (1.6%) 52 (1.9%)
US 955 (97.5%) 1721 (98.4%) 2676 (98.1%)
as.factor(gestationaldm)
0 952 (97.2%) 1664 (95.1%) 2616 (95.9%)
1 27 (2.8%) 85 (4.9%) 112 (4.1%)
as.factor(gestationalhypertension)
0 957 (97.8%) 1647 (94.2%) 2604 (95.5%)
1 22 (2.2%) 102 (5.8%) 124 (4.5%)
as.factor(hypertensioneclampsia)
0 977 (99.8%) 1744 (99.7%) 2721 (99.7%)
1 2 (0.2%) 5 (0.3%) 7 (0.3%)
as.factor(birthweightrecode4)
1 734 (75.0%) 301 (17.2%) 1035 (37.9%)
2 130 (13.3%) 397 (22.7%) 527 (19.3%)
3 103 (10.5%) 1050 (60.0%) 1153 (42.3%)
4 12 (1.2%) 1 (0.1%) 13 (0.5%)
as.factor(hospb)
1 955 (97.5%) 1729 (98.9%) 2684 (98.4%)
2 24 (2.5%) 20 (1.1%) 44 (1.6%)