#Load required libraries
library(foreign)
library(survival)
library(car)
library(survey)
library(eha)
library(tidyverse)
options(survey.lonely.psu = "adjust")

Read wave 3, wave 4 and weight

w3 <- as_tibble(read.delim("21600-0008-Data.tsv", sep = '\t', header = TRUE ))
w4 <- as_tibble(read.delim("21600-0022-Data.tsv", sep = '\t', header = TRUE ))
w5 <- as_tibble(read.delim("21600-0032-Data.tsv", sep = '\t', header = TRUE ))
wt <- as_tibble(read.delim("21600-0004-Data.tsv", sep = '\t', header = TRUE ))


names(w3)<-tolower(names(w3))
names(w4)<-tolower(names(w4))
names(w5)<-tolower(names(w5))
names(wt)<-tolower(names(wt))

Data Cleaning

# Year of survey for wave 3
#summary(w3$iyear3)

#Age at birth
#summary(w3$h3od1y)

# Current age is year of survey minus age at birth
w3$age <- (w3$iyear3 -w3$h3od1y)
#summary(w3$age)


# Year of survey for wave 4
#summary(w4$iyear4)

#Age at birth
#summary(w4$h4od1y)

# Current age is year of survey minus age at birth
w4$age <- (w4$iyear4 -w4$h4od1y)
#summary(w4$age)


# Year of survey for wave 5
#summary(w5$iyear5)

#Age at birth
#summary(w5$h5od1y)

# Current age is year of survey minus age at birth
w5$age <- (w5$iyear5 -w5$h5od1y)
#summary(w5$age)



# Filter respondent to only 18-24

w3 <- w3%>%
  filter(age >=18 & age <=24)

# Filter respondent to those not serving in he military
#table(w3$h3lm39)

w3 <- w3%>%
  filter(h3lm39 ==0)

# Opportunity Youth Status

#School status- Are you currently enrolled in school or in a job training or vocational education program?
#table(w3$h3da36)

# currently in school

w3$schoolstatus<- car::Recode(w3$h3da36,
                       recodes="1=1; 0=0; else=NA")

#Employment status
#table(w3$h3da28)

#Employment status recode

w3$empstat<- car::Recode(w3$h3da28,
                       recodes="1=1; 0=0; else=NA")

# merging schooling and working

w3$opport<- paste( w3$schoolstatus , w3$empstat, sep ="")
#table(w3$opport)


# Opportunity Youth status
w3$opportunity_youth_three<- car::Recode(w3$opport,
                       recodes="'00'='Opportunity Youth'; '01'='Schooling/Employed'; '10'='Schooling/Employed'; '11'='Schooling & Employed'; else=NA",
                       as.factor=T)

w3$opportunity_youth_two<- car::Recode(w3$opport,
                       recodes="'00'='Opportunity Youth'; '01'='Not Opport Youth'; '10'='Not Opport Youth'; '11'='Not Opport Youth'; else=NA",
                       as.factor=T)


# Insurance at Wave 3

#table(w3$h3hs5)
w3$insurance<- car::Recode(w3$h3hs5,
                       recodes="0='Not Covered'; 1:9='Covered'; else=NA",
                       as.factor=T)


# Race ethnicity for wave 3
#table(w3$h3od2)
#table(w3$h3ir4)
w3$hisp <- car::Recode(w3$h3od2, recodes = "1= 'hisp' ; 0= 'not_hisp'; else=NA")
w3$race <- car::Recode(w3$h3ir4, recodes = "1= 'wh'; 2= 'bl'; 3 = 'asian'; 4='other'; else=NA")
w3$race_eth<-ifelse(w3$hisp ==1, "hisp", w3$race)
w3$race_eth <- interaction(w3$hisp, w3$race)


  w3$race_ethR<- mutate(w3, ifelse(hisp==0 & race==1, 1,
                    ifelse(hisp==0 & race==2, 2,
                     ifelse(hisp==1, 3,
                    ifelse(hisp==0 & race==3, 4,
                     ifelse(hisp==0 & race==4, 5, "NA"))))))
w3$race_ethR<- ifelse(substr(w3$race_eth, 1, 4)=="hisp", "hisp", as.character(w3$race_eth))


w3$race_ethR <- car::Recode(w3$race_ethR, recodes = "'not_hisp.asian'= 'not_hisp.other'")

#w3$race_ethR <- car::Recode(w3$race_ethR, recodes = "'hisp'= 'minority'; 'not_hisp.bl'='minority'; 'not_hisp.other'= 'minority'; 'not_hisp.wh' = 'majority'")



# Event of interest 

# SRH- Health status - WAVE 3
#table(w3$h3gh1)
w3$badhealth <-car::Recode(w3$h3gh1, recodes="4:5 =1 ; 1:3=0 ; else=NA")

# SRH- Health status - WAVE 4
#table(w4$h4gh1)
w4$badhealth <-car::Recode(w4$h4gh1, recodes="4:5 =1 ; 1:3=0 ; else=NA")


# SRH- Health status - WAVE 5
#table(w5$h5id1)
w5$badhealth <-car::Recode(w5$h5id1, recodes="4:5 =1 ; 1:3=0 ; else=NA")
# Combine all ways
allwaves <- left_join(w3, w4, by="aid")
allwaves <- left_join(allwaves, w5, by="aid")
allwaves<- left_join(allwaves, wt, by="aid")
gc()
##            used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells  2257287 120.6    4527038 241.8         NA  3603786 192.5
## Vcells 18280891 139.5   33547340 256.0      16384 27889391 212.8
#get out only the variables for use 

myvars<-c( "aid","badhealth.x", "badhealth.y", "badhealth", "opportunity_youth_three","opportunity_youth_two",
           "age.x", "age.y", "bio_sex3", "age", "insurance", "gswgt1", "cluster2",
           "race_ethR")
allwaves2<-allwaves[,myvars]
# time varying variables- Health and age
allwaves2$age_1<- allwaves2$age.x
allwaves2$age_2<- allwaves2$age.y
allwaves2$age_3<- allwaves2$age



allwaves2$badhealth_1<-allwaves2$badhealth.x
allwaves2$badhealth_2<-allwaves2$badhealth.y
allwaves2$badhealth_3<-allwaves2$badhealth

#Time constant variables- Opportunity youth stat, sex, bmi, insurance, race
#Recode race with white, non Hispanic as reference using dummy vars

#Opportunity Youth status
#bmi
#Insurance
#race
allwaves2$sex<- car::Recode(allwaves2$bio_sex3, recodes="1='Male'; 2='Female'")

Remove young adult who already experienced bad health as they are not at risk of experience bad health in wave 4

allwaves2<-allwaves2 %>% filter(is.na(badhealth_1)==F &
                  is.na(badhealth_2)==F &
                  is.na(badhealth_3)==F & 
                  is.na(age_1)==F &
                  is.na(age_2)==F &
                  is.na(age_3)==F &
                    badhealth_1!=1)%>%
  mutate(badhealthtran_1 =ifelse(badhealth_1==0 & badhealth_2==0, 0,1), 
        badhealthtran_2 = ifelse(badhealthtran_1==1, NA,
                         ifelse(badhealth_2==0 & badhealth_3==0,0,1)))
e.long1 <- allwaves2 %>%
  #rename(wt = w4c4p_40,strata= w4c4p_4str, psu = w4c4p_4psu)%>%
  select(opportunity_youth_two,insurance, race_ethR, sex,opportunity_youth_three,aid,gswgt1,cluster2,    #time constant
        age_1, age_2,age_3, #t-varying variables
         badhealth_1, badhealth_2, badhealth_3 )%>%
   pivot_longer(cols = c(-opportunity_youth_two,-insurance, -race_ethR, -sex, -opportunity_youth_three, -aid, -gswgt1, -cluster2), #time constant variables go here
               names_to  = c(".value", "wave"), #make wave variable and put t-v vars into columns
               names_sep = "_") %>% #all t-v variables have _ between name and time, like age_1, age_2
   group_by(aid)%>%
  mutate(age_enter = age, 
         age_exit = lead(age, 1, order_by=aid))%>%
  mutate(nexbadhealth = dplyr::lead(badhealth,n=1, order_by = age))%>%
  mutate(badhealthtran = ifelse(nexbadhealth == 1 & badhealth == 0, 1, 0))%>%
  filter(is.na(age_exit)==F)%>%
  ungroup()%>%
  filter(complete.cases(age_enter, age_exit, badhealthtran,
                        opportunity_youth_two, insurance, race_ethR,sex, gswgt1,cluster2))

person-period data set

  subpp<-survSplit(Surv(time = age_enter, time2=age_exit, event = badhealthtran)~., data=e.long1,
   cut=seq(25, 41, 1),  episode="year_health")

  subpp$year <-   subpp$year_health-1
options(survey.lonely.psu = "adjust")
des<-survey::svydesign(ids=~cluster2,
               #strata=~strata,
               data=subpp,
               weight=~gswgt1 )
#basic log models 
#Linear term for time
fit.0<-svyglm(badhealthtran~as.factor(year)-1,
                 design=des,
                 family=binomial(link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.0)
## 
## Call:
## svyglm(formula = badhealthtran ~ as.factor(year) - 1, design = des, 
##     family = binomial(link = "cloglog"))
## 
## Survey design:
## survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## as.factor(year)0   -6.9519     0.9992  -6.957 2.33e-10 ***
## as.factor(year)1   -4.8120     0.3449 -13.952  < 2e-16 ***
## as.factor(year)2   -4.3149     0.2311 -18.674  < 2e-16 ***
## as.factor(year)3   -3.9812     0.1774 -22.440  < 2e-16 ***
## as.factor(year)4   -4.1765     0.2267 -18.422  < 2e-16 ***
## as.factor(year)5   -4.1564     0.1958 -21.227  < 2e-16 ***
## as.factor(year)6   -4.8670     0.2663 -18.277  < 2e-16 ***
## as.factor(year)7   -7.4098     1.0021  -7.394 2.58e-11 ***
## as.factor(year)8   -8.5307     1.0068  -8.473 9.60e-14 ***
## as.factor(year)9   -5.0949     0.3488 -14.605  < 2e-16 ***
## as.factor(year)10  -3.9067     0.2248 -17.377  < 2e-16 ***
## as.factor(year)11  -3.6101     0.1894 -19.062  < 2e-16 ***
## as.factor(year)12  -3.4750     0.1929 -18.015  < 2e-16 ***
## as.factor(year)13  -2.9277     0.1692 -17.306  < 2e-16 ***
## as.factor(year)14  -2.8989     0.2048 -14.154  < 2e-16 ***
## as.factor(year)15  -2.4222     0.3146  -7.699 5.42e-12 ***
## as.factor(year)16  -2.5649     0.7181  -3.572  0.00052 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.000034)
## 
## Number of Fisher Scoring iterations: 10

Plot the hazard function on the probability scale

haz<-1/(1+exp(-coef(fit.0)))
time<-seq(0,16,1)
plot(haz~time, type="l", ylab="h(t)")
title(main="Discrete Time Hazard Function for bad health")

St<-NA
  time<-1:length(haz)
  St[1]<-1-haz[1]
  for(i in 2:length(haz)){
    St[i]<-St[i-1]* (1-haz[i])
  }
  
  St<-c(1, St)
  time<-c(0, time)
  plot(y=St,x=time, type="l",
       main="Survival function for Bad Health")

  1/(1+exp(-coef(fit.0)))
##  as.factor(year)0  as.factor(year)1  as.factor(year)2  as.factor(year)3 
##      0.0009559234      0.0080655985      0.0131914125      0.0183206749 
##  as.factor(year)4  as.factor(year)5  as.factor(year)6  as.factor(year)7 
##      0.0151206735      0.0154228787      0.0076373369      0.0006049248 
##  as.factor(year)8  as.factor(year)9 as.factor(year)10 as.factor(year)11 
##      0.0001972731      0.0060908827      0.0197113767      0.0263364896 
## as.factor(year)12 as.factor(year)13 as.factor(year)14 as.factor(year)15 
##      0.0300329841      0.0508011518      0.0522078814      0.0814959000 
## as.factor(year)16 
##      0.0714313349
fit.1<-svyglm(badhealthtran~as.factor(year)-1+opportunity_youth_two+race_ethR+insurance,
              design=des,
              family=binomial(link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

Test for an interaction between at least two of the predictors

fit.2<-svyglm(badhealthtran~as.factor(year)-1+opportunity_youth_two+race_ethR+insurance+opportunity_youth_two*insurance,
              design=des,
              family=binomial(link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.2)
## 
## Call:
## svyglm(formula = badhealthtran ~ as.factor(year) - 1 + opportunity_youth_two + 
##     race_ethR + insurance + opportunity_youth_two * insurance, 
##     design = des, family = binomial(link = "cloglog"))
## 
## Survey design:
## survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)
## 
## Coefficients:
##                                                             Estimate Std. Error
## as.factor(year)0                                             -6.9464     1.0376
## as.factor(year)1                                             -4.8063     0.3750
## as.factor(year)2                                             -4.3081     0.2784
## as.factor(year)3                                             -3.9745     0.2472
## as.factor(year)4                                             -4.1702     0.2821
## as.factor(year)5                                             -4.1501     0.2598
## as.factor(year)6                                             -4.8614     0.3027
## as.factor(year)7                                             -7.4042     1.0367
## as.factor(year)8                                             -8.5251     1.0184
## as.factor(year)9                                             -5.0897     0.3776
## as.factor(year)10                                            -3.9045     0.2550
## as.factor(year)11                                            -3.6080     0.2382
## as.factor(year)12                                            -3.4794     0.2395
## as.factor(year)13                                            -2.9330     0.2364
## as.factor(year)14                                            -2.9108     0.2577
## as.factor(year)15                                            -2.3849     0.3369
## as.factor(year)16                                            -2.4707     0.6970
## opportunity_youth_twoOpportunity Youth                        0.5803     0.1640
## race_ethRnot_hisp.bl                                         -0.1359     0.1942
## race_ethRnot_hisp.other                                      -0.4672     0.3233
## race_ethRnot_hisp.wh                                         -0.2516     0.1592
## insuranceNot Covered                                          0.5310     0.1382
## opportunity_youth_twoOpportunity Youth:insuranceNot Covered  -0.3590     0.2545
##                                                             t value Pr(>|t|)
## as.factor(year)0                                             -6.695 1.00e-09
## as.factor(year)1                                            -12.818  < 2e-16
## as.factor(year)2                                            -15.475  < 2e-16
## as.factor(year)3                                            -16.076  < 2e-16
## as.factor(year)4                                            -14.782  < 2e-16
## as.factor(year)5                                            -15.977  < 2e-16
## as.factor(year)6                                            -16.059  < 2e-16
## as.factor(year)7                                             -7.142 1.12e-10
## as.factor(year)8                                             -8.371 2.26e-13
## as.factor(year)9                                            -13.480  < 2e-16
## as.factor(year)10                                           -15.312  < 2e-16
## as.factor(year)11                                           -15.148  < 2e-16
## as.factor(year)12                                           -14.529  < 2e-16
## as.factor(year)13                                           -12.405  < 2e-16
## as.factor(year)14                                           -11.293  < 2e-16
## as.factor(year)15                                            -7.079 1.53e-10
## as.factor(year)16                                            -3.545 0.000582
## opportunity_youth_twoOpportunity Youth                        3.538 0.000596
## race_ethRnot_hisp.bl                                         -0.700 0.485652
## race_ethRnot_hisp.other                                      -1.445 0.151363
## race_ethRnot_hisp.wh                                         -1.581 0.116829
## insuranceNot Covered                                          3.842 0.000207
## opportunity_youth_twoOpportunity Youth:insuranceNot Covered  -1.411 0.161208
##                                                                
## as.factor(year)0                                            ***
## as.factor(year)1                                            ***
## as.factor(year)2                                            ***
## as.factor(year)3                                            ***
## as.factor(year)4                                            ***
## as.factor(year)5                                            ***
## as.factor(year)6                                            ***
## as.factor(year)7                                            ***
## as.factor(year)8                                            ***
## as.factor(year)9                                            ***
## as.factor(year)10                                           ***
## as.factor(year)11                                           ***
## as.factor(year)12                                           ***
## as.factor(year)13                                           ***
## as.factor(year)14                                           ***
## as.factor(year)15                                           ***
## as.factor(year)16                                           ***
## opportunity_youth_twoOpportunity Youth                      ***
## race_ethRnot_hisp.bl                                           
## race_ethRnot_hisp.other                                        
## race_ethRnot_hisp.wh                                           
## insuranceNot Covered                                        ***
## opportunity_youth_twoOpportunity Youth:insuranceNot Covered    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.044842)
## 
## Number of Fisher Scoring iterations: 10

Consider both the general model and other time specifications

#Linear term for time
  fit.l<-svyglm(badhealthtran~year,
                design=des ,
                family=binomial(link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
  summary(fit.l)
## 
## Call:
## svyglm(formula = badhealthtran ~ year, design = des, family = binomial(link = "cloglog"))
## 
## Survey design:
## survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.22741    0.14849 -35.203  < 2e-16 ***
## year         0.13420    0.01766   7.597 5.44e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.041056)
## 
## Number of Fisher Scoring iterations: 7
fit.s<-svyglm(badhealthtran~year+I(year^2),
                 design=des ,
                 family=binomial(link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
 summary(fit.s)
## 
## Call:
## svyglm(formula = badhealthtran ~ year + I(year^2), design = des, 
##     family = binomial(link = "cloglog"))
## 
## Survey design:
## survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.527324   0.144535 -31.323  < 2e-16 ***
## year        -0.144995   0.039261  -3.693 0.000327 ***
## I(year^2)    0.018939   0.002601   7.283 2.96e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.02468)
## 
## Number of Fisher Scoring iterations: 7
 fit.c<-svyglm(badhealthtran~year+I(year^2)+I(year^3 ),
                design=des ,
                family=binomial(link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.c)
## 
## Call:
## svyglm(formula = badhealthtran ~ year + I(year^2) + I(year^3), 
##     design = des, family = binomial(link = "cloglog"))
## 
## Survey design:
## survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.9656588  0.2190221 -22.672  < 2e-16 ***
## year         0.2464417  0.1313089   1.877  0.06284 .  
## I(year^2)   -0.0456506  0.0207827  -2.197  0.02987 *  
## I(year^3)    0.0028026  0.0008833   3.173  0.00189 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9563483)
## 
## Number of Fisher Scoring iterations: 7
  fit.q<-svyglm(badhealthtran~year+I(year^2)+I(year^3 )+I(year^4),
                design=des ,
                family=binomial(link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.q)
## 
## Call:
## svyglm(formula = badhealthtran ~ year + I(year^2) + I(year^3) + 
##     I(year^4), design = des, family = binomial(link = "cloglog"))
## 
## Survey design:
## survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.600786   0.599695 -11.007  < 2e-16 ***
## year         2.208615   0.447568   4.935 2.49e-06 ***
## I(year^2)   -0.609548   0.104381  -5.840 4.18e-08 ***
## I(year^3)    0.059036   0.009552   6.181 8.16e-09 ***
## I(year^4)   -0.001807   0.000301  -6.004 1.91e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9342506)
## 
## Number of Fisher Scoring iterations: 8
#Spline
  library(splines)
  fit.sp<-svyglm(badhealthtran~ns(year, df = 3),
                 design=des ,
                 family=binomial(link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
  summary(fit.sp)
## 
## Call:
## svyglm(formula = badhealthtran ~ ns(year, df = 3), design = des, 
##     family = binomial(link = "cloglog"))
## 
## Survey design:
## survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -5.2520     0.2790 -18.827  < 2e-16 ***
## ns(year, df = 3)1  -0.8796     0.2952  -2.980  0.00346 ** 
## ns(year, df = 3)2   3.4269     0.6450   5.313 4.69e-07 ***
## ns(year, df = 3)3   3.1433     0.1841  17.070  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9278753)
## 
## Number of Fisher Scoring iterations: 7
 dat<-expand.grid(year=seq(1,16,1))
  dat$genmod<-predict(fit.0, newdata=data.frame(year=as.factor(1:16 )), type="response")
  dat$lin<-predict(fit.l, newdata=dat, type="response")
  dat$sq<-predict(fit.s, newdata=dat, type="response")
  dat$cub<-predict(fit.c, newdata=dat, type="response")
  dat$quart<-predict(fit.q, newdata=dat, type="response")
  dat$spline<-predict(fit.sp, newdata=expand.grid(year=seq(1,16,1)), type="response")
dat
##    year       genmod         lin          sq         cub       quart
## 1     1 0.0080982127 0.006119485 0.009484079 0.008511509 0.007097246
## 2     2 0.0132788004 0.006995315 0.008687081 0.009678812 0.015206660
## 3     3 0.0184895183 0.007995989 0.008262678 0.010391804 0.017911905
## 4     4 0.0152355648 0.009139150 0.008161068 0.010712791 0.014833722
## 5     5 0.0155424202 0.010444882 0.008370567 0.010782740 0.010550105
## 6     6 0.0076665755 0.011936041 0.008915367 0.010775445 0.007545520
## 7     7 0.0006051078 0.013638613 0.009860200 0.010871369 0.006091534
## 8     8 0.0001972925 0.015582121 0.011323177 0.011259977 0.005967865
## 9     9 0.0061094697 0.017800072 0.013500301 0.012174375 0.007303020
## 10   10 0.0199069155 0.020330451 0.016708618 0.013971271 0.010997825
## 11   11 0.0266863174 0.023216266 0.021460724 0.017300717 0.019215086
## 12   12 0.0304884511 0.026506138 0.028593329 0.023492903 0.035142431
## 13   13 0.0521130463 0.030254938 0.039489479 0.035520740 0.058230222
## 14   14 0.0535940543 0.034524467 0.056460400 0.060578894 0.073025081
## 15   15 0.0849044270 0.039384170 0.083384463 0.117251248 0.055706605
## 16   16 0.0740418891 0.044911871 0.126698072 0.253494084 0.019495458
##         spline
## 1  0.005223203
## 2  0.006656082
## 3  0.008279898
## 4  0.009817101
## 5  0.010833170
## 6  0.010864519
## 7  0.009825749
## 8  0.008479714
## 9  0.007510468
## 10 0.007343278
## 11 0.008524920
## 12 0.012379091
## 13 0.021815152
## 14 0.044246229
## 15 0.097362702
## 16 0.216144015
plot(genmod~year, dat, type="l", ylab="h(t)", xlab="Time", ylim=c(0, .25), xlim=c(0, 16))
title(main="Hazard function from different time parameterizations")
lines(lin~year, dat, col=2, lwd=2)
lines(sq~year, dat, col=3, lwd=2)
lines(cub~year, dat, col=4, lwd=2)
lines(quart~year, dat, col=5, lwd=2)
lines(spline~year, dat, col=6, lwd=2)
legend("topleft",
       legend=c("General Model", "Linear","Square", "Cubic", "Quartic", "Natural spline"),
       col=1:6, lwd=1.5)

#AIC table
  aic<-round(c(
    fit.l$deviance+2*length(fit.l$coefficients),
    fit.s$deviance+2*length(fit.s$coefficients),
    fit.c$deviance+2*length(fit.c$coefficients),
    fit.q$deviance+2*length(fit.q$coefficients),
    fit.sp$deviance+2*length(fit.sp$coefficients),
    fit.0$deviance+2*length(fit.0$coefficients)),2)
  #compare all aics to the one from the general model
  dif.aic<-round(aic-aic[6],2)
  data.frame(model =c( "linear","square", "cubic", "quartic","spline", "general"),
             aic=aic,
             aic_dif=dif.aic)
##     model     aic aic_dif
## 1  linear 3972.67  192.05
## 2  square 3932.52  151.90
## 3   cubic 3915.92  135.30
## 4 quartic 3828.00   47.38
## 5  spline 3880.80  100.18
## 6 general 3780.62    0.00

According to the Aic, the general model seems to fit best and followed by the quartic model.