#Load required libraries
library(foreign)
library(survival)
library(car)
library(survey)
library(eha)
library(tidyverse)
options(survey.lonely.psu = "adjust")
Read wave 3, wave 4 and weight
w3 <- as_tibble(read.delim("21600-0008-Data.tsv", sep = '\t', header = TRUE ))
w4 <- as_tibble(read.delim("21600-0022-Data.tsv", sep = '\t', header = TRUE ))
w5 <- as_tibble(read.delim("21600-0032-Data.tsv", sep = '\t', header = TRUE ))
wt <- as_tibble(read.delim("21600-0004-Data.tsv", sep = '\t', header = TRUE ))
names(w3)<-tolower(names(w3))
names(w4)<-tolower(names(w4))
names(w5)<-tolower(names(w5))
names(wt)<-tolower(names(wt))
Data Cleaning
# Year of survey for wave 3
#summary(w3$iyear3)
#Age at birth
#summary(w3$h3od1y)
# Current age is year of survey minus age at birth
w3$age <- (w3$iyear3 -w3$h3od1y)
#summary(w3$age)
# Year of survey for wave 4
#summary(w4$iyear4)
#Age at birth
#summary(w4$h4od1y)
# Current age is year of survey minus age at birth
w4$age <- (w4$iyear4 -w4$h4od1y)
#summary(w4$age)
# Year of survey for wave 5
#summary(w5$iyear5)
#Age at birth
#summary(w5$h5od1y)
# Current age is year of survey minus age at birth
w5$age <- (w5$iyear5 -w5$h5od1y)
#summary(w5$age)
# Filter respondent to only 18-24
w3 <- w3%>%
filter(age >=18 & age <=24)
# Filter respondent to those not serving in he military
#table(w3$h3lm39)
w3 <- w3%>%
filter(h3lm39 ==0)
# Opportunity Youth Status
#School status- Are you currently enrolled in school or in a job training or vocational education program?
#table(w3$h3da36)
# currently in school
w3$schoolstatus<- car::Recode(w3$h3da36,
recodes="1=1; 0=0; else=NA")
#Employment status
#table(w3$h3da28)
#Employment status recode
w3$empstat<- car::Recode(w3$h3da28,
recodes="1=1; 0=0; else=NA")
# merging schooling and working
w3$opport<- paste( w3$schoolstatus , w3$empstat, sep ="")
#table(w3$opport)
# Opportunity Youth status
w3$opportunity_youth_three<- car::Recode(w3$opport,
recodes="'00'='Opportunity Youth'; '01'='Schooling/Employed'; '10'='Schooling/Employed'; '11'='Schooling & Employed'; else=NA",
as.factor=T)
w3$opportunity_youth_two<- car::Recode(w3$opport,
recodes="'00'='Opportunity Youth'; '01'='Not Opport Youth'; '10'='Not Opport Youth'; '11'='Not Opport Youth'; else=NA",
as.factor=T)
# Insurance at Wave 3
#table(w3$h3hs5)
w3$insurance<- car::Recode(w3$h3hs5,
recodes="0='Not Covered'; 1:9='Covered'; else=NA",
as.factor=T)
# Race ethnicity for wave 3
#table(w3$h3od2)
#table(w3$h3ir4)
w3$hisp <- car::Recode(w3$h3od2, recodes = "1= 'hisp' ; 0= 'not_hisp'; else=NA")
w3$race <- car::Recode(w3$h3ir4, recodes = "1= 'wh'; 2= 'bl'; 3 = 'asian'; 4='other'; else=NA")
w3$race_eth<-ifelse(w3$hisp ==1, "hisp", w3$race)
w3$race_eth <- interaction(w3$hisp, w3$race)
w3$race_ethR<- mutate(w3, ifelse(hisp==0 & race==1, 1,
ifelse(hisp==0 & race==2, 2,
ifelse(hisp==1, 3,
ifelse(hisp==0 & race==3, 4,
ifelse(hisp==0 & race==4, 5, "NA"))))))
w3$race_ethR<- ifelse(substr(w3$race_eth, 1, 4)=="hisp", "hisp", as.character(w3$race_eth))
w3$race_ethR <- car::Recode(w3$race_ethR, recodes = "'not_hisp.asian'= 'not_hisp.other'")
#w3$race_ethR <- car::Recode(w3$race_ethR, recodes = "'hisp'= 'minority'; 'not_hisp.bl'='minority'; 'not_hisp.other'= 'minority'; 'not_hisp.wh' = 'majority'")
# Event of interest
# SRH- Health status - WAVE 3
#table(w3$h3gh1)
w3$badhealth <-car::Recode(w3$h3gh1, recodes="4:5 =1 ; 1:3=0 ; else=NA")
# SRH- Health status - WAVE 4
#table(w4$h4gh1)
w4$badhealth <-car::Recode(w4$h4gh1, recodes="4:5 =1 ; 1:3=0 ; else=NA")
# SRH- Health status - WAVE 5
#table(w5$h5id1)
w5$badhealth <-car::Recode(w5$h5id1, recodes="4:5 =1 ; 1:3=0 ; else=NA")
# Combine all ways
allwaves <- left_join(w3, w4, by="aid")
allwaves <- left_join(allwaves, w5, by="aid")
allwaves<- left_join(allwaves, wt, by="aid")
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 2257287 120.6 4527038 241.8 NA 3603786 192.5
## Vcells 18280891 139.5 33547340 256.0 16384 27889391 212.8
#get out only the variables for use
myvars<-c( "aid","badhealth.x", "badhealth.y", "badhealth", "opportunity_youth_three","opportunity_youth_two",
"age.x", "age.y", "bio_sex3", "age", "insurance", "gswgt1", "cluster2",
"race_ethR")
allwaves2<-allwaves[,myvars]
# time varying variables- Health and age
allwaves2$age_1<- allwaves2$age.x
allwaves2$age_2<- allwaves2$age.y
allwaves2$age_3<- allwaves2$age
allwaves2$badhealth_1<-allwaves2$badhealth.x
allwaves2$badhealth_2<-allwaves2$badhealth.y
allwaves2$badhealth_3<-allwaves2$badhealth
#Time constant variables- Opportunity youth stat, sex, bmi, insurance, race
#Recode race with white, non Hispanic as reference using dummy vars
#Opportunity Youth status
#bmi
#Insurance
#race
allwaves2$sex<- car::Recode(allwaves2$bio_sex3, recodes="1='Male'; 2='Female'")
Remove young adult who already experienced bad health as they are
not at risk of experience bad health in wave 4
allwaves2<-allwaves2 %>% filter(is.na(badhealth_1)==F &
is.na(badhealth_2)==F &
is.na(badhealth_3)==F &
is.na(age_1)==F &
is.na(age_2)==F &
is.na(age_3)==F &
badhealth_1!=1)%>%
mutate(badhealthtran_1 =ifelse(badhealth_1==0 & badhealth_2==0, 0,1),
badhealthtran_2 = ifelse(badhealthtran_1==1, NA,
ifelse(badhealth_2==0 & badhealth_3==0,0,1)))
e.long1 <- allwaves2 %>%
#rename(wt = w4c4p_40,strata= w4c4p_4str, psu = w4c4p_4psu)%>%
select(opportunity_youth_two,insurance, race_ethR, sex,opportunity_youth_three,aid,gswgt1,cluster2, #time constant
age_1, age_2,age_3, #t-varying variables
badhealth_1, badhealth_2, badhealth_3 )%>%
pivot_longer(cols = c(-opportunity_youth_two,-insurance, -race_ethR, -sex, -opportunity_youth_three, -aid, -gswgt1, -cluster2), #time constant variables go here
names_to = c(".value", "wave"), #make wave variable and put t-v vars into columns
names_sep = "_") %>% #all t-v variables have _ between name and time, like age_1, age_2
group_by(aid)%>%
mutate(age_enter = age,
age_exit = lead(age, 1, order_by=aid))%>%
mutate(nexbadhealth = dplyr::lead(badhealth,n=1, order_by = age))%>%
mutate(badhealthtran = ifelse(nexbadhealth == 1 & badhealth == 0, 1, 0))%>%
filter(is.na(age_exit)==F)%>%
ungroup()%>%
filter(complete.cases(age_enter, age_exit, badhealthtran,
opportunity_youth_two, insurance, race_ethR,sex, gswgt1,cluster2))
person-period data set
subpp<-survSplit(Surv(time = age_enter, time2=age_exit, event = badhealthtran)~., data=e.long1,
cut=seq(25, 41, 1), episode="year_health")
subpp$year <- subpp$year_health-1
options(survey.lonely.psu = "adjust")
des<-survey::svydesign(ids=~cluster2,
#strata=~strata,
data=subpp,
weight=~gswgt1 )
#basic log models
#Linear term for time
fit.0<-svyglm(badhealthtran~as.factor(year)-1,
design=des,
family=binomial(link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.0)
##
## Call:
## svyglm(formula = badhealthtran ~ as.factor(year) - 1, design = des,
## family = binomial(link = "cloglog"))
##
## Survey design:
## survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## as.factor(year)0 -6.9519 0.9992 -6.957 2.33e-10 ***
## as.factor(year)1 -4.8120 0.3449 -13.952 < 2e-16 ***
## as.factor(year)2 -4.3149 0.2311 -18.674 < 2e-16 ***
## as.factor(year)3 -3.9812 0.1774 -22.440 < 2e-16 ***
## as.factor(year)4 -4.1765 0.2267 -18.422 < 2e-16 ***
## as.factor(year)5 -4.1564 0.1958 -21.227 < 2e-16 ***
## as.factor(year)6 -4.8670 0.2663 -18.277 < 2e-16 ***
## as.factor(year)7 -7.4098 1.0021 -7.394 2.58e-11 ***
## as.factor(year)8 -8.5307 1.0068 -8.473 9.60e-14 ***
## as.factor(year)9 -5.0949 0.3488 -14.605 < 2e-16 ***
## as.factor(year)10 -3.9067 0.2248 -17.377 < 2e-16 ***
## as.factor(year)11 -3.6101 0.1894 -19.062 < 2e-16 ***
## as.factor(year)12 -3.4750 0.1929 -18.015 < 2e-16 ***
## as.factor(year)13 -2.9277 0.1692 -17.306 < 2e-16 ***
## as.factor(year)14 -2.8989 0.2048 -14.154 < 2e-16 ***
## as.factor(year)15 -2.4222 0.3146 -7.699 5.42e-12 ***
## as.factor(year)16 -2.5649 0.7181 -3.572 0.00052 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.000034)
##
## Number of Fisher Scoring iterations: 10
Plot the hazard function on the probability scale
haz<-1/(1+exp(-coef(fit.0)))
time<-seq(0,16,1)
plot(haz~time, type="l", ylab="h(t)")
title(main="Discrete Time Hazard Function for bad health")

St<-NA
time<-1:length(haz)
St[1]<-1-haz[1]
for(i in 2:length(haz)){
St[i]<-St[i-1]* (1-haz[i])
}
St<-c(1, St)
time<-c(0, time)
plot(y=St,x=time, type="l",
main="Survival function for Bad Health")

1/(1+exp(-coef(fit.0)))
## as.factor(year)0 as.factor(year)1 as.factor(year)2 as.factor(year)3
## 0.0009559234 0.0080655985 0.0131914125 0.0183206749
## as.factor(year)4 as.factor(year)5 as.factor(year)6 as.factor(year)7
## 0.0151206735 0.0154228787 0.0076373369 0.0006049248
## as.factor(year)8 as.factor(year)9 as.factor(year)10 as.factor(year)11
## 0.0001972731 0.0060908827 0.0197113767 0.0263364896
## as.factor(year)12 as.factor(year)13 as.factor(year)14 as.factor(year)15
## 0.0300329841 0.0508011518 0.0522078814 0.0814959000
## as.factor(year)16
## 0.0714313349
fit.1<-svyglm(badhealthtran~as.factor(year)-1+opportunity_youth_two+race_ethR+insurance,
design=des,
family=binomial(link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Test for an interaction between at least two of the predictors
fit.2<-svyglm(badhealthtran~as.factor(year)-1+opportunity_youth_two+race_ethR+insurance+opportunity_youth_two*insurance,
design=des,
family=binomial(link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.2)
##
## Call:
## svyglm(formula = badhealthtran ~ as.factor(year) - 1 + opportunity_youth_two +
## race_ethR + insurance + opportunity_youth_two * insurance,
## design = des, family = binomial(link = "cloglog"))
##
## Survey design:
## survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)
##
## Coefficients:
## Estimate Std. Error
## as.factor(year)0 -6.9464 1.0376
## as.factor(year)1 -4.8063 0.3750
## as.factor(year)2 -4.3081 0.2784
## as.factor(year)3 -3.9745 0.2472
## as.factor(year)4 -4.1702 0.2821
## as.factor(year)5 -4.1501 0.2598
## as.factor(year)6 -4.8614 0.3027
## as.factor(year)7 -7.4042 1.0367
## as.factor(year)8 -8.5251 1.0184
## as.factor(year)9 -5.0897 0.3776
## as.factor(year)10 -3.9045 0.2550
## as.factor(year)11 -3.6080 0.2382
## as.factor(year)12 -3.4794 0.2395
## as.factor(year)13 -2.9330 0.2364
## as.factor(year)14 -2.9108 0.2577
## as.factor(year)15 -2.3849 0.3369
## as.factor(year)16 -2.4707 0.6970
## opportunity_youth_twoOpportunity Youth 0.5803 0.1640
## race_ethRnot_hisp.bl -0.1359 0.1942
## race_ethRnot_hisp.other -0.4672 0.3233
## race_ethRnot_hisp.wh -0.2516 0.1592
## insuranceNot Covered 0.5310 0.1382
## opportunity_youth_twoOpportunity Youth:insuranceNot Covered -0.3590 0.2545
## t value Pr(>|t|)
## as.factor(year)0 -6.695 1.00e-09
## as.factor(year)1 -12.818 < 2e-16
## as.factor(year)2 -15.475 < 2e-16
## as.factor(year)3 -16.076 < 2e-16
## as.factor(year)4 -14.782 < 2e-16
## as.factor(year)5 -15.977 < 2e-16
## as.factor(year)6 -16.059 < 2e-16
## as.factor(year)7 -7.142 1.12e-10
## as.factor(year)8 -8.371 2.26e-13
## as.factor(year)9 -13.480 < 2e-16
## as.factor(year)10 -15.312 < 2e-16
## as.factor(year)11 -15.148 < 2e-16
## as.factor(year)12 -14.529 < 2e-16
## as.factor(year)13 -12.405 < 2e-16
## as.factor(year)14 -11.293 < 2e-16
## as.factor(year)15 -7.079 1.53e-10
## as.factor(year)16 -3.545 0.000582
## opportunity_youth_twoOpportunity Youth 3.538 0.000596
## race_ethRnot_hisp.bl -0.700 0.485652
## race_ethRnot_hisp.other -1.445 0.151363
## race_ethRnot_hisp.wh -1.581 0.116829
## insuranceNot Covered 3.842 0.000207
## opportunity_youth_twoOpportunity Youth:insuranceNot Covered -1.411 0.161208
##
## as.factor(year)0 ***
## as.factor(year)1 ***
## as.factor(year)2 ***
## as.factor(year)3 ***
## as.factor(year)4 ***
## as.factor(year)5 ***
## as.factor(year)6 ***
## as.factor(year)7 ***
## as.factor(year)8 ***
## as.factor(year)9 ***
## as.factor(year)10 ***
## as.factor(year)11 ***
## as.factor(year)12 ***
## as.factor(year)13 ***
## as.factor(year)14 ***
## as.factor(year)15 ***
## as.factor(year)16 ***
## opportunity_youth_twoOpportunity Youth ***
## race_ethRnot_hisp.bl
## race_ethRnot_hisp.other
## race_ethRnot_hisp.wh
## insuranceNot Covered ***
## opportunity_youth_twoOpportunity Youth:insuranceNot Covered
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.044842)
##
## Number of Fisher Scoring iterations: 10
Consider both the general model and other time specifications
#Linear term for time
fit.l<-svyglm(badhealthtran~year,
design=des ,
family=binomial(link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.l)
##
## Call:
## svyglm(formula = badhealthtran ~ year, design = des, family = binomial(link = "cloglog"))
##
## Survey design:
## survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.22741 0.14849 -35.203 < 2e-16 ***
## year 0.13420 0.01766 7.597 5.44e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.041056)
##
## Number of Fisher Scoring iterations: 7
fit.s<-svyglm(badhealthtran~year+I(year^2),
design=des ,
family=binomial(link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.s)
##
## Call:
## svyglm(formula = badhealthtran ~ year + I(year^2), design = des,
## family = binomial(link = "cloglog"))
##
## Survey design:
## survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.527324 0.144535 -31.323 < 2e-16 ***
## year -0.144995 0.039261 -3.693 0.000327 ***
## I(year^2) 0.018939 0.002601 7.283 2.96e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.02468)
##
## Number of Fisher Scoring iterations: 7
fit.c<-svyglm(badhealthtran~year+I(year^2)+I(year^3 ),
design=des ,
family=binomial(link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.c)
##
## Call:
## svyglm(formula = badhealthtran ~ year + I(year^2) + I(year^3),
## design = des, family = binomial(link = "cloglog"))
##
## Survey design:
## survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.9656588 0.2190221 -22.672 < 2e-16 ***
## year 0.2464417 0.1313089 1.877 0.06284 .
## I(year^2) -0.0456506 0.0207827 -2.197 0.02987 *
## I(year^3) 0.0028026 0.0008833 3.173 0.00189 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9563483)
##
## Number of Fisher Scoring iterations: 7
fit.q<-svyglm(badhealthtran~year+I(year^2)+I(year^3 )+I(year^4),
design=des ,
family=binomial(link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.q)
##
## Call:
## svyglm(formula = badhealthtran ~ year + I(year^2) + I(year^3) +
## I(year^4), design = des, family = binomial(link = "cloglog"))
##
## Survey design:
## survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.600786 0.599695 -11.007 < 2e-16 ***
## year 2.208615 0.447568 4.935 2.49e-06 ***
## I(year^2) -0.609548 0.104381 -5.840 4.18e-08 ***
## I(year^3) 0.059036 0.009552 6.181 8.16e-09 ***
## I(year^4) -0.001807 0.000301 -6.004 1.91e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9342506)
##
## Number of Fisher Scoring iterations: 8
#Spline
library(splines)
fit.sp<-svyglm(badhealthtran~ns(year, df = 3),
design=des ,
family=binomial(link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.sp)
##
## Call:
## svyglm(formula = badhealthtran ~ ns(year, df = 3), design = des,
## family = binomial(link = "cloglog"))
##
## Survey design:
## survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.2520 0.2790 -18.827 < 2e-16 ***
## ns(year, df = 3)1 -0.8796 0.2952 -2.980 0.00346 **
## ns(year, df = 3)2 3.4269 0.6450 5.313 4.69e-07 ***
## ns(year, df = 3)3 3.1433 0.1841 17.070 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9278753)
##
## Number of Fisher Scoring iterations: 7
dat<-expand.grid(year=seq(1,16,1))
dat$genmod<-predict(fit.0, newdata=data.frame(year=as.factor(1:16 )), type="response")
dat$lin<-predict(fit.l, newdata=dat, type="response")
dat$sq<-predict(fit.s, newdata=dat, type="response")
dat$cub<-predict(fit.c, newdata=dat, type="response")
dat$quart<-predict(fit.q, newdata=dat, type="response")
dat$spline<-predict(fit.sp, newdata=expand.grid(year=seq(1,16,1)), type="response")
dat
## year genmod lin sq cub quart
## 1 1 0.0080982127 0.006119485 0.009484079 0.008511509 0.007097246
## 2 2 0.0132788004 0.006995315 0.008687081 0.009678812 0.015206660
## 3 3 0.0184895183 0.007995989 0.008262678 0.010391804 0.017911905
## 4 4 0.0152355648 0.009139150 0.008161068 0.010712791 0.014833722
## 5 5 0.0155424202 0.010444882 0.008370567 0.010782740 0.010550105
## 6 6 0.0076665755 0.011936041 0.008915367 0.010775445 0.007545520
## 7 7 0.0006051078 0.013638613 0.009860200 0.010871369 0.006091534
## 8 8 0.0001972925 0.015582121 0.011323177 0.011259977 0.005967865
## 9 9 0.0061094697 0.017800072 0.013500301 0.012174375 0.007303020
## 10 10 0.0199069155 0.020330451 0.016708618 0.013971271 0.010997825
## 11 11 0.0266863174 0.023216266 0.021460724 0.017300717 0.019215086
## 12 12 0.0304884511 0.026506138 0.028593329 0.023492903 0.035142431
## 13 13 0.0521130463 0.030254938 0.039489479 0.035520740 0.058230222
## 14 14 0.0535940543 0.034524467 0.056460400 0.060578894 0.073025081
## 15 15 0.0849044270 0.039384170 0.083384463 0.117251248 0.055706605
## 16 16 0.0740418891 0.044911871 0.126698072 0.253494084 0.019495458
## spline
## 1 0.005223203
## 2 0.006656082
## 3 0.008279898
## 4 0.009817101
## 5 0.010833170
## 6 0.010864519
## 7 0.009825749
## 8 0.008479714
## 9 0.007510468
## 10 0.007343278
## 11 0.008524920
## 12 0.012379091
## 13 0.021815152
## 14 0.044246229
## 15 0.097362702
## 16 0.216144015
plot(genmod~year, dat, type="l", ylab="h(t)", xlab="Time", ylim=c(0, .25), xlim=c(0, 16))
title(main="Hazard function from different time parameterizations")
lines(lin~year, dat, col=2, lwd=2)
lines(sq~year, dat, col=3, lwd=2)
lines(cub~year, dat, col=4, lwd=2)
lines(quart~year, dat, col=5, lwd=2)
lines(spline~year, dat, col=6, lwd=2)
legend("topleft",
legend=c("General Model", "Linear","Square", "Cubic", "Quartic", "Natural spline"),
col=1:6, lwd=1.5)

#AIC table
aic<-round(c(
fit.l$deviance+2*length(fit.l$coefficients),
fit.s$deviance+2*length(fit.s$coefficients),
fit.c$deviance+2*length(fit.c$coefficients),
fit.q$deviance+2*length(fit.q$coefficients),
fit.sp$deviance+2*length(fit.sp$coefficients),
fit.0$deviance+2*length(fit.0$coefficients)),2)
#compare all aics to the one from the general model
dif.aic<-round(aic-aic[6],2)
data.frame(model =c( "linear","square", "cubic", "quartic","spline", "general"),
aic=aic,
aic_dif=dif.aic)
## model aic aic_dif
## 1 linear 3972.67 192.05
## 2 square 3932.52 151.90
## 3 cubic 3915.92 135.30
## 4 quartic 3828.00 47.38
## 5 spline 3880.80 100.18
## 6 general 3780.62 0.00
According to the Aic, the general model seems to fit best and
followed by the quartic model.