#Load required libraries
library(foreign)
library(survival)
library(car)
library(survey)
library(eha)
library(tidyverse)
options(survey.lonely.psu = "adjust")

Read wave 3, wave 4 and weight

w3 <- as_tibble(read.delim("21600-0008-Data.tsv", sep = '\t', header = TRUE ))
w4 <- as_tibble(read.delim("21600-0022-Data.tsv", sep = '\t', header = TRUE ))
w5 <- as_tibble(read.delim("21600-0032-Data.tsv", sep = '\t', header = TRUE ))
wt <- as_tibble(read.delim("21600-0004-Data.tsv", sep = '\t', header = TRUE ))


names(w3)<-tolower(names(w3))
names(w4)<-tolower(names(w4))
names(w5)<-tolower(names(w5))
names(wt)<-tolower(names(wt))

Data Cleaning

# Year of survey for wave 3
#summary(w3$iyear3)

#Age at birth
#summary(w3$h3od1y)

# Current age is year of survey minus age at birth
w3$age <- (w3$iyear3 -w3$h3od1y)
#summary(w3$age)


# Year of survey for wave 4
#summary(w4$iyear4)

#Age at birth
#summary(w4$h4od1y)

# Current age is year of survey minus age at birth
w4$age <- (w4$iyear4 -w4$h4od1y)
#summary(w4$age)


# Year of survey for wave 5
#summary(w5$iyear5)

#Age at birth
#summary(w5$h5od1y)

# Current age is year of survey minus age at birth
w5$age <- (w5$iyear5 -w5$h5od1y)
#summary(w5$age)



# Filter respondent to only 18-24

w3 <- w3%>%
  filter(age >=18 & age <=24)

# Filter respondent to those not serving in he military
#table(w3$h3lm39)

w3 <- w3%>%
  filter(h3lm39 ==0)

# Opportunity Youth Status

#School status- Are you currently enrolled in school or in a job training or vocational education program?
#table(w3$h3da36)

# currently in school

w3$schoolstatus<- car::Recode(w3$h3da36,
                       recodes="1=1; 0=0; else=NA")

#Employment status
#table(w3$h3da28)

#Employment status recode

w3$empstat<- car::Recode(w3$h3da28,
                       recodes="1=1; 0=0; else=NA")

# merging schooling and working

w3$opport<- paste( w3$schoolstatus , w3$empstat, sep ="")
#table(w3$opport)


# Opportunity Youth status
w3$opportunity_youth_three<- car::Recode(w3$opport,
                       recodes="'00'='Opportunity Youth'; '01'='Schooling/Employed'; '10'='Schooling/Employed'; '11'='Schooling & Employed'; else=NA",
                       as.factor=T)

w3$opportunity_youth_two<- car::Recode(w3$opport,
                       recodes="'00'='Opportunity Youth'; '01'='Not Opport Youth'; '10'='Not Opport Youth'; '11'='Not Opport Youth'; else=NA",
                       as.factor=T)


# Insurance at Wave 3

#table(w3$h3hs5)
w3$insurance<- car::Recode(w3$h3hs5,
                       recodes="0='Not Covered'; 1:9='Covered'; else=NA",
                       as.factor=T)


# Race ethnicity for wave 3
#table(w3$h3od2)
#table(w3$h3ir4)
w3$hisp <- car::Recode(w3$h3od2, recodes = "1= 'hisp' ; 0= 'not_hisp'; else=NA")
w3$race <- car::Recode(w3$h3ir4, recodes = "1= 'wh'; 2= 'bl'; 3 = 'asian'; 4='other'; else=NA")
w3$race_eth<-ifelse(w3$hisp ==1, "hisp", w3$race)
w3$race_eth <- interaction(w3$hisp, w3$race)


  w3$race_ethR<- mutate(w3, ifelse(hisp==0 & race==1, 1,
                    ifelse(hisp==0 & race==2, 2,
                     ifelse(hisp==1, 3,
                    ifelse(hisp==0 & race==3, 4,
                     ifelse(hisp==0 & race==4, 5, "NA"))))))
w3$race_ethR<- ifelse(substr(w3$race_eth, 1, 4)=="hisp", "hisp", as.character(w3$race_eth))


w3$race_ethR <- car::Recode(w3$race_ethR, recodes = "'not_hisp.asian'= 'not_hisp.other'")

#w3$race_ethR <- car::Recode(w3$race_ethR, recodes = "'hisp'= 'minority'; 'not_hisp.bl'='minority'; 'not_hisp.other'= 'minority'; 'not_hisp.wh' = 'majority'")



# Event of interest 

# SRH- Health status - WAVE 3
#table(w3$h3gh1)
w3$badhealth <-car::Recode(w3$h3gh1, recodes="4:5 =1 ; 1:3=0 ; else=NA")

# SRH- Health status - WAVE 4
#table(w4$h4gh1)
w4$badhealth <-car::Recode(w4$h4gh1, recodes="4:5 =1 ; 1:3=0 ; else=NA")


# SRH- Health status - WAVE 5
#table(w5$h5id1)
w5$badhealth <-car::Recode(w5$h5id1, recodes="4:5 =1 ; 1:3=0 ; else=NA")

# Combine all ways
allwaves <- left_join(w3, w4, by="aid")
allwaves <- left_join(allwaves, w5, by="aid")
allwaves<- left_join(allwaves, wt, by="aid")

gc()

##            used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells  2257287 120.6    4527038 241.8         NA  3603786 192.5
## Vcells 18280891 139.5   33547340 256.0      16384 27889391 212.8

#get out only the variables for use 

myvars<-c( "aid","badhealth.x", "badhealth.y", "badhealth", "opportunity_youth_three","opportunity_youth_two",
           "age.x", "age.y", "bio_sex3", "age", "insurance", "gswgt1", "cluster2",
           "race_ethR")
allwaves2<-allwaves[,myvars]

# time varying variables- Health and age
allwaves2$age_1<- allwaves2$age.x
allwaves2$age_2<- allwaves2$age.y
allwaves2$age_3<- allwaves2$age



allwaves2$badhealth_1<-allwaves2$badhealth.x
allwaves2$badhealth_2<-allwaves2$badhealth.y
allwaves2$badhealth_3<-allwaves2$badhealth

#Time constant variables- Opportunity youth stat, sex, bmi, insurance, race
#Recode race with white, non Hispanic as reference using dummy vars

#Opportunity Youth status
#bmi
#Insurance
#race
allwaves2$sex<- car::Recode(allwaves2$bio_sex3, recodes="1='Male'; 2='Female'")

Remove young adult who already experienced bad health as they are not at risk of experience bad health in wave 4

allwaves2<-allwaves2 %>% filter(is.na(badhealth_1)==F &
                  is.na(badhealth_2)==F &
                  is.na(badhealth_3)==F & 
                  is.na(age_1)==F &
                  is.na(age_2)==F &
                  is.na(age_3)==F &
                    badhealth_1!=1)%>%
  mutate(badhealthtran_1 =ifelse(badhealth_1==0 & badhealth_2==0, 0,1), 
        badhealthtran_2 = ifelse(badhealthtran_1==1, NA,
                         ifelse(badhealth_2==0 & badhealth_3==0,0,1)))

e.long1 <- allwaves2 %>%
  #rename(wt = w4c4p_40,strata= w4c4p_4str, psu = w4c4p_4psu)%>%
  select(opportunity_youth_two,insurance, race_ethR, sex,opportunity_youth_three,aid,gswgt1,cluster2,    #time constant
        age_1, age_2,age_3, #t-varying variables
         badhealth_1, badhealth_2, badhealth_3 )%>%
   pivot_longer(cols = c(-opportunity_youth_two,-insurance, -race_ethR, -sex, -opportunity_youth_three, -aid, -gswgt1, -cluster2), #time constant variables go here
               names_to  = c(".value", "wave"), #make wave variable and put t-v vars into columns
               names_sep = "_") %>% #all t-v variables have _ between name and time, like age_1, age_2
   group_by(aid)%>%
  mutate(age_enter = age, 
         age_exit = lead(age, 1, order_by=aid))%>%
  mutate(nexbadhealth = dplyr::lead(badhealth,n=1, order_by = age))%>%
  mutate(badhealthtran = ifelse(nexbadhealth == 1 & badhealth == 0, 1, 0))%>%
  filter(is.na(age_exit)==F)%>%
  ungroup()%>%
  filter(complete.cases(age_enter, age_exit, badhealthtran,
                        opportunity_youth_two, insurance, race_ethR,sex, gswgt1,cluster2))

person-period data set

  subpp<-survSplit(Surv(time = age_enter, time2=age_exit, event = badhealthtran)~., data=e.long1,
   cut=seq(25, 41, 1),  episode="year_health")

  subpp$year <-   subpp$year_health-1

options(survey.lonely.psu = "adjust")
des<-survey::svydesign(ids=~cluster2,
               #strata=~strata,
               data=subpp,
               weight=~gswgt1 )

#basic log models 
#Linear term for time
fit.0<-svyglm(badhealthtran~as.factor(year)-1,
                 design=des,
                 family=binomial(link="cloglog"))

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

summary(fit.0)

## 
## Call:
## svyglm(formula = badhealthtran ~ as.factor(year) - 1, design = des, 
##     family = binomial(link = "cloglog"))
## 
## Survey design:
## survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## as.factor(year)0   -6.9519     0.9992  -6.957 2.33e-10 ***
## as.factor(year)1   -4.8120     0.3449 -13.952  < 2e-16 ***
## as.factor(year)2   -4.3149     0.2311 -18.674  < 2e-16 ***
## as.factor(year)3   -3.9812     0.1774 -22.440  < 2e-16 ***
## as.factor(year)4   -4.1765     0.2267 -18.422  < 2e-16 ***
## as.factor(year)5   -4.1564     0.1958 -21.227  < 2e-16 ***
## as.factor(year)6   -4.8670     0.2663 -18.277  < 2e-16 ***
## as.factor(year)7   -7.4098     1.0021  -7.394 2.58e-11 ***
## as.factor(year)8   -8.5307     1.0068  -8.473 9.60e-14 ***
## as.factor(year)9   -5.0949     0.3488 -14.605  < 2e-16 ***
## as.factor(year)10  -3.9067     0.2248 -17.377  < 2e-16 ***
## as.factor(year)11  -3.6101     0.1894 -19.062  < 2e-16 ***
## as.factor(year)12  -3.4750     0.1929 -18.015  < 2e-16 ***
## as.factor(year)13  -2.9277     0.1692 -17.306  < 2e-16 ***
## as.factor(year)14  -2.8989     0.2048 -14.154  < 2e-16 ***
## as.factor(year)15  -2.4222     0.3146  -7.699 5.42e-12 ***
## as.factor(year)16  -2.5649     0.7181  -3.572  0.00052 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.000034)
## 
## Number of Fisher Scoring iterations: 10

Plot the hazard function on the probability scale

haz<-1/(1+exp(-coef(fit.0)))
time<-seq(0,16,1)
plot(haz~time, type="l", ylab="h(t)")
title(main="Discrete Time Hazard Function for bad health")

St<-NA
  time<-1:length(haz)
  St[1]<-1-haz[1]
  for(i in 2:length(haz)){
    St[i]<-St[i-1]* (1-haz[i])
  }
  
  St<-c(1, St)
  time<-c(0, time)
  plot(y=St,x=time, type="l",
       main="Survival function for Bad Health")

  1/(1+exp(-coef(fit.0)))

##  as.factor(year)0  as.factor(year)1  as.factor(year)2  as.factor(year)3 
##      0.0009559234      0.0080655985      0.0131914125      0.0183206749 
##  as.factor(year)4  as.factor(year)5  as.factor(year)6  as.factor(year)7 
##      0.0151206735      0.0154228787      0.0076373369      0.0006049248 
##  as.factor(year)8  as.factor(year)9 as.factor(year)10 as.factor(year)11 
##      0.0001972731      0.0060908827      0.0197113767      0.0263364896 
## as.factor(year)12 as.factor(year)13 as.factor(year)14 as.factor(year)15 
##      0.0300329841      0.0508011518      0.0522078814      0.0814959000 
## as.factor(year)16 
##      0.0714313349

fit.1<-svyglm(badhealthtran~as.factor(year)-1+opportunity_youth_two+race_ethR+insurance,
              design=des,
              family=binomial(link="cloglog"))

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

Test for an interaction between at least two of the predictors

fit.2<-svyglm(badhealthtran~as.factor(year)-1+opportunity_youth_two+race_ethR+insurance+opportunity_youth_two*insurance,
              design=des,
              family=binomial(link="cloglog"))

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

summary(fit.2)

## 
## Call:
## svyglm(formula = badhealthtran ~ as.factor(year) - 1 + opportunity_youth_two + 
##     race_ethR + insurance + opportunity_youth_two * insurance, 
##     design = des, family = binomial(link = "cloglog"))
## 
## Survey design:
## survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)
## 
## Coefficients:
##                                                             Estimate Std. Error
## as.factor(year)0                                             -6.9464     1.0376
## as.factor(year)1                                             -4.8063     0.3750
## as.factor(year)2                                             -4.3081     0.2784
## as.factor(year)3                                             -3.9745     0.2472
## as.factor(year)4                                             -4.1702     0.2821
## as.factor(year)5                                             -4.1501     0.2598
## as.factor(year)6                                             -4.8614     0.3027
## as.factor(year)7                                             -7.4042     1.0367
## as.factor(year)8                                             -8.5251     1.0184
## as.factor(year)9                                             -5.0897     0.3776
## as.factor(year)10                                            -3.9045     0.2550
## as.factor(year)11                                            -3.6080     0.2382
## as.factor(year)12                                            -3.4794     0.2395
## as.factor(year)13                                            -2.9330     0.2364
## as.factor(year)14                                            -2.9108     0.2577
## as.factor(year)15                                            -2.3849     0.3369
## as.factor(year)16                                            -2.4707     0.6970
## opportunity_youth_twoOpportunity Youth                        0.5803     0.1640
## race_ethRnot_hisp.bl                                         -0.1359     0.1942
## race_ethRnot_hisp.other                                      -0.4672     0.3233
## race_ethRnot_hisp.wh                                         -0.2516     0.1592
## insuranceNot Covered                                          0.5310     0.1382
## opportunity_youth_twoOpportunity Youth:insuranceNot Covered  -0.3590     0.2545
##                                                             t value Pr(>|t|)
## as.factor(year)0                                             -6.695 1.00e-09
## as.factor(year)1                                            -12.818  < 2e-16
## as.factor(year)2                                            -15.475  < 2e-16
## as.factor(year)3                                            -16.076  < 2e-16
## as.factor(year)4                                            -14.782  < 2e-16
## as.factor(year)5                                            -15.977  < 2e-16
## as.factor(year)6                                            -16.059  < 2e-16
## as.factor(year)7                                             -7.142 1.12e-10
## as.factor(year)8                                             -8.371 2.26e-13
## as.factor(year)9                                            -13.480  < 2e-16
## as.factor(year)10                                           -15.312  < 2e-16
## as.factor(year)11                                           -15.148  < 2e-16
## as.factor(year)12                                           -14.529  < 2e-16
## as.factor(year)13                                           -12.405  < 2e-16
## as.factor(year)14                                           -11.293  < 2e-16
## as.factor(year)15                                            -7.079 1.53e-10
## as.factor(year)16                                            -3.545 0.000582
## opportunity_youth_twoOpportunity Youth                        3.538 0.000596
## race_ethRnot_hisp.bl                                         -0.700 0.485652
## race_ethRnot_hisp.other                                      -1.445 0.151363
## race_ethRnot_hisp.wh                                         -1.581 0.116829
## insuranceNot Covered                                          3.842 0.000207
## opportunity_youth_twoOpportunity Youth:insuranceNot Covered  -1.411 0.161208
##                                                                
## as.factor(year)0                                            ***
## as.factor(year)1                                            ***
## as.factor(year)2                                            ***
## as.factor(year)3                                            ***
## as.factor(year)4                                            ***
## as.factor(year)5                                            ***
## as.factor(year)6                                            ***
## as.factor(year)7                                            ***
## as.factor(year)8                                            ***
## as.factor(year)9                                            ***
## as.factor(year)10                                           ***
## as.factor(year)11                                           ***
## as.factor(year)12                                           ***
## as.factor(year)13                                           ***
## as.factor(year)14                                           ***
## as.factor(year)15                                           ***
## as.factor(year)16                                           ***
## opportunity_youth_twoOpportunity Youth                      ***
## race_ethRnot_hisp.bl                                           
## race_ethRnot_hisp.other                                        
## race_ethRnot_hisp.wh                                           
## insuranceNot Covered                                        ***
## opportunity_youth_twoOpportunity Youth:insuranceNot Covered    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.044842)
## 
## Number of Fisher Scoring iterations: 10

Consider both the general model and other time specifications

#Linear term for time
  fit.l<-svyglm(badhealthtran~year,
                design=des ,
                family=binomial(link="cloglog"))

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

  summary(fit.l)

## 
## Call:
## svyglm(formula = badhealthtran ~ year, design = des, family = binomial(link = "cloglog"))
## 
## Survey design:
## survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.22741    0.14849 -35.203  < 2e-16 ***
## year         0.13420    0.01766   7.597 5.44e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.041056)
## 
## Number of Fisher Scoring iterations: 7

fit.s<-svyglm(badhealthtran~year+I(year^2),
                 design=des ,
                 family=binomial(link="cloglog"))

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

 summary(fit.s)

## 
## Call:
## svyglm(formula = badhealthtran ~ year + I(year^2), design = des, 
##     family = binomial(link = "cloglog"))
## 
## Survey design:
## survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.527324   0.144535 -31.323  < 2e-16 ***
## year        -0.144995   0.039261  -3.693 0.000327 ***
## I(year^2)    0.018939   0.002601   7.283 2.96e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.02468)
## 
## Number of Fisher Scoring iterations: 7

 fit.c<-svyglm(badhealthtran~year+I(year^2)+I(year^3 ),
                design=des ,
                family=binomial(link="cloglog"))

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

summary(fit.c)

## 
## Call:
## svyglm(formula = badhealthtran ~ year + I(year^2) + I(year^3), 
##     design = des, family = binomial(link = "cloglog"))
## 
## Survey design:
## survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.9656588  0.2190221 -22.672  < 2e-16 ***
## year         0.2464417  0.1313089   1.877  0.06284 .  
## I(year^2)   -0.0456506  0.0207827  -2.197  0.02987 *  
## I(year^3)    0.0028026  0.0008833   3.173  0.00189 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9563483)
## 
## Number of Fisher Scoring iterations: 7

  fit.q<-svyglm(badhealthtran~year+I(year^2)+I(year^3 )+I(year^4),
                design=des ,
                family=binomial(link="cloglog"))

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

summary(fit.q)

## 
## Call:
## svyglm(formula = badhealthtran ~ year + I(year^2) + I(year^3) + 
##     I(year^4), design = des, family = binomial(link = "cloglog"))
## 
## Survey design:
## survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.600786   0.599695 -11.007  < 2e-16 ***
## year         2.208615   0.447568   4.935 2.49e-06 ***
## I(year^2)   -0.609548   0.104381  -5.840 4.18e-08 ***
## I(year^3)    0.059036   0.009552   6.181 8.16e-09 ***
## I(year^4)   -0.001807   0.000301  -6.004 1.91e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9342506)
## 
## Number of Fisher Scoring iterations: 8

#Spline
  library(splines)
  fit.sp<-svyglm(badhealthtran~ns(year, df = 3),
                 design=des ,
                 family=binomial(link="cloglog"))

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

  summary(fit.sp)

## 
## Call:
## svyglm(formula = badhealthtran ~ ns(year, df = 3), design = des, 
##     family = binomial(link = "cloglog"))
## 
## Survey design:
## survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -5.2520     0.2790 -18.827  < 2e-16 ***
## ns(year, df = 3)1  -0.8796     0.2952  -2.980  0.00346 ** 
## ns(year, df = 3)2   3.4269     0.6450   5.313 4.69e-07 ***
## ns(year, df = 3)3   3.1433     0.1841  17.070  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9278753)
## 
## Number of Fisher Scoring iterations: 7

 dat<-expand.grid(year=seq(1,16,1))
  dat$genmod<-predict(fit.0, newdata=data.frame(year=as.factor(1:16 )), type="response")
  dat$lin<-predict(fit.l, newdata=dat, type="response")
  dat$sq<-predict(fit.s, newdata=dat, type="response")
  dat$cub<-predict(fit.c, newdata=dat, type="response")
  dat$quart<-predict(fit.q, newdata=dat, type="response")
  dat$spline<-predict(fit.sp, newdata=expand.grid(year=seq(1,16,1)), type="response")
dat

##    year       genmod         lin          sq         cub       quart
## 1     1 0.0080982127 0.006119485 0.009484079 0.008511509 0.007097246
## 2     2 0.0132788004 0.006995315 0.008687081 0.009678812 0.015206660
## 3     3 0.0184895183 0.007995989 0.008262678 0.010391804 0.017911905
## 4     4 0.0152355648 0.009139150 0.008161068 0.010712791 0.014833722
## 5     5 0.0155424202 0.010444882 0.008370567 0.010782740 0.010550105
## 6     6 0.0076665755 0.011936041 0.008915367 0.010775445 0.007545520
## 7     7 0.0006051078 0.013638613 0.009860200 0.010871369 0.006091534
## 8     8 0.0001972925 0.015582121 0.011323177 0.011259977 0.005967865
## 9     9 0.0061094697 0.017800072 0.013500301 0.012174375 0.007303020
## 10   10 0.0199069155 0.020330451 0.016708618 0.013971271 0.010997825
## 11   11 0.0266863174 0.023216266 0.021460724 0.017300717 0.019215086
## 12   12 0.0304884511 0.026506138 0.028593329 0.023492903 0.035142431
## 13   13 0.0521130463 0.030254938 0.039489479 0.035520740 0.058230222
## 14   14 0.0535940543 0.034524467 0.056460400 0.060578894 0.073025081
## 15   15 0.0849044270 0.039384170 0.083384463 0.117251248 0.055706605
## 16   16 0.0740418891 0.044911871 0.126698072 0.253494084 0.019495458
##         spline
## 1  0.005223203
## 2  0.006656082
## 3  0.008279898
## 4  0.009817101
## 5  0.010833170
## 6  0.010864519
## 7  0.009825749
## 8  0.008479714
## 9  0.007510468
## 10 0.007343278
## 11 0.008524920
## 12 0.012379091
## 13 0.021815152
## 14 0.044246229
## 15 0.097362702
## 16 0.216144015

plot(genmod~year, dat, type="l", ylab="h(t)", xlab="Time", ylim=c(0, .25), xlim=c(0, 16))
title(main="Hazard function from different time parameterizations")
lines(lin~year, dat, col=2, lwd=2)
lines(sq~year, dat, col=3, lwd=2)
lines(cub~year, dat, col=4, lwd=2)
lines(quart~year, dat, col=5, lwd=2)
lines(spline~year, dat, col=6, lwd=2)
legend("topleft",
       legend=c("General Model", "Linear","Square", "Cubic", "Quartic", "Natural spline"),
       col=1:6, lwd=1.5)

#AIC table
  aic<-round(c(
    fit.l$deviance+2*length(fit.l$coefficients),
    fit.s$deviance+2*length(fit.s$coefficients),
    fit.c$deviance+2*length(fit.c$coefficients),
    fit.q$deviance+2*length(fit.q$coefficients),
    fit.sp$deviance+2*length(fit.sp$coefficients),
    fit.0$deviance+2*length(fit.0$coefficients)),2)
  #compare all aics to the one from the general model
  dif.aic<-round(aic-aic[6],2)
  data.frame(model =c( "linear","square", "cubic", "quartic","spline", "general"),
             aic=aic,
             aic_dif=dif.aic)

##     model     aic aic_dif
## 1  linear 3972.67  192.05
## 2  square 3932.52  151.90
## 3   cubic 3915.92  135.30
## 4 quartic 3828.00   47.38
## 5  spline 3880.80  100.18
## 6 general 3780.62    0.00

Assignment 5

Joseph Jaiyeola

2022-10-27

Read wave 3, wave 4 and weight

Data Cleaning

Remove young adult who already experienced bad health as they are not at risk of experience bad health in wave 4

person-period data set

Plot the hazard function on the probability scale

Test for an interaction between at least two of the predictors

Consider both the general model and other time specifications

According to the Aic, the general model seems to fit best and followed by the quartic model.