Loading required package: carData
Attaching package: 'car'
The following object is masked from 'package:dplyr':
recode
The following object is masked from 'package:purrr':
some
nhis <-read_stata("C:/Users/maman/OneDrive - University of Texas at San Antonio/Event History Analysis Data/nhis_00002.dta.gz")nhis <- haven::zap_labels(nhis)nhis <- nhis %>%filter(mortelig ==1)#Creating covariatesnhis$sex1 <-Recode(nhis$sex,recodes="1='male'; 2='female'; else=NA")nhis$cit <-Recode(nhis$citizen,recodes="1='no'; 2='yes'; else=NA")nhis$obese <-Recode(nhis$bmicat,recodes="1:3='no'; 4='yes'; else=NA")nhis$education <-Recode(nhis$educrec2,recodes="10:41='less than hs'; 42='hs grad'; 50:53= 'some college'; 54='college degree'; 60='more than college'; else=NA")nhis$pov <-Recode(nhis$pooryn,recodes="1='at or above'; 2='below'; else=NA")nhis$smokstat <-Recode(nhis$smokfreqnow,recodes="1='non-smoker'; 2='current smoker'; 3='evry day smoker'; else=NA")nhis$marstat1 <-Recode(nhis$marstat,recodes="10:13='married'; 20='widowed'; 30='divorced'; 40='separated'; 50='never married'; else=NA")nhis$white <-Recode(nhis$racenew,recodes="100=1; 997:999=NA; else=0")nhis$black <-Recode(nhis$racenew,recodes="200=1; 997:999=NA; else=0")nhis$othr <-Recode(nhis$racenew,recodes="300:542=1; 997:999=NA; else=0")nhis$hisp <-Recode(nhis$hispeth,recodes="10=0; 20:70=1; else=NA")nhis$hisp1 <-Recode(nhis$hispeth,recodes="10='no'; 20:70='yes'; else=NA")nhis$race_eth[nhis$hisp ==0& nhis$white==1]<-"NHWhite"
Warning: Unknown or uninitialised column: `race_eth`.
1.Fit the discrete time hazard model to your outcome
a)You must form a person-period data set
b)Consider both the general model and other time specifications
c)Include all main effects in the model
d)Test for an interaction between at least two of the predictors e)Generate hazard plots for interesting cases highlighting the significant predictors in your analysis
library(ggplot2)out%>%dplyr::filter(obese=="yes") %>%dplyr::mutate(mod = dplyr::case_when(.$variable =="h0"~"Constant",.$variable =="h1"~"General",.$variable =="h2"~"Linear",.$variable =="h3"~"Polynomial - 2",.$variable =="h4"~"Spline"))%>%ggplot(aes(x = mort_5_yr*5, y=value ))+geom_line(aes(group=race_eth, color=race_eth) )+labs(title ="Hazard function for adult mortality",subtitle ="Alternative model specifications")+xlab("Age")+ylab("S(t)")+facet_wrap(~mod)#+ scale_y_continuous(trans = "log10")
Don't know how to automatically pick scale for object of type svystat. Defaulting to continuous.
The plotted hazard rates for each model specification suggest that the mortality risk among obese individuals appears to be similar for Hispanics and non-Hispanic Whites, while non-Hispanic Blacks have the highest rate.
Model fit
#constructing a table of relative model fitsAIC0<-AIC(m0)
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC1<-AIC(m1)
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC2<-AIC(m2)
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC3<-AIC(m3)
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC4<-AIC(m4)
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AICS$deltaAIC<-AICS$AIC - AICS$AIC[AICS$mod=="general"]knitr::kable(AICS[, c("mod", "AIC", "deltaAIC")],caption ="Relative AIC for alternative time specifications")
Relative AIC for alternative time specifications
mod
AIC
deltaAIC
const
15791.71
4544.15216
general
11247.56
0.00000
linear
11315.69
68.13744
poly 2
11284.06
36.50958
spline
11283.56
36.00395
It seems that out of all the alternative time specifications, the spline fits the general model the best.