This document makes an easier access to the supplementary material of the article entitled Relative Index of Inequality and Slope Index of Inequality: A Structured Regression Framework for Estimation Available Here

First get dataRII.txt and dataSII.txt from these links.

You do not need a dropbox account to download theses files !

Please contact either Margarita Moreno (margarita.moreno at mcri.edu.au) or me (aurelien.latouche at cnam.fr).

1) Estimation of RII

Data generated from Cox model with Weibull baseline hazard, including age and the socioeconomic rank x as predictors, with RII=3. Uniform censoring (27%) was superimposed.

dat<-read.table("dataRII.txt",dec=".",sep=";",header=T)

Variables in database:

head(dat)
##   ID     time status socgroup            x midpoint ageEntry
## 1  1  5.91796      1        1 0.0007726050     0.05 61.40943
## 2  2 18.84545      0        1 0.0009642883     0.05 38.57793
## 3  3 15.47689      1        1 0.0018605184     0.05 63.55081
## 4  4 11.60002      1        1 0.0020955342     0.05 55.50917
## 5  5 16.96284      0        1 0.0031381176     0.05 35.67072
## 6  6 17.53922      0        1 0.0055284084     0.05 41.28230
##             ageExit..
## 1 67.3273897429392 \\
## 2 57.4233816505875 \\
## 3 79.0277006776071 \\
## 4 67.1091917095493 \\
## 5 52.6335585908964 \\
## 6 58.8215236843098 \\

Estimate RII from Cox model using age as time-scale : Use coxph function from survival package to fit model. To obtain robust SEs add the term “+cluster(ID)”.

library(survival)
dat$ageExit<-dat$ageEntry+dat$time
fit<-summary(coxph(Surv(time=ageEntry,time2=ageExit,event=status)~midpoint+cluster(ID),data=dat))
logRII<-fit$coefficients[1]
selogrii<-fit$coefficients[4]
library(msm) #for generic delta-method function
serii<-deltamethod(~exp(x1),logRII,selogrii^2)
ResultsHaz<-c(RII=exp(logRII),SE=serii,lower95=fit$conf.int[3],upper95=fit$conf.int[4])

Estimate RII from multiplicative Poisson model

dat$ageGroup2<-tcut(dat$ageEntry, c(30,35,40,45,50,55,60,65,70,75,80,85), labels = 
                  c("30+ - 35","35+ - 40","40+ - 45","45+ - 50","50+ - 55","55+ - 60",
                    "60+ - 65","65+ - 70","70+-75","75+-80","80+-85"))
py<-pyears(Surv(time=time,event=status)~socgroup+ageGroup2, data=dat,data.frame=T,scale=1)
datAGG<-py$data[1:28,]
datAGG$midpoint<-NULL
dismid<-unique(dat$midpoint)
dismid<-dismid[order(dismid)]
for(g in 1:4)datAGG$midpoint[datAGG$socgroup==g]<-dismid[g]  

Use glm function with option “family=quasipoisson(link=”log“)” to estimate overdispersion

fit<-glm(event~offset(log(pyears))+midpoint+ageGroup2,data=datAGG,family=quasipoisson(link="log") )
df.r <- fit$df.residual
disp<-sum((fit$weights * fit$residuals^2)[fit$weights > 0])/df.r
if(disp<1) fit<-glm(event~offset(log(pyears))+midpoint+ageGroup2,data=datAGG,
                    family=poisson(link="log") )

logRII<-summary(fit)$coefficients["midpoint",1]
selogrii<-summary(fit)$coefficients["midpoint",2]
serii<-deltamethod(~exp(x1),logRII,selogrii^2)
ResultsIncid<-c(RII=exp(logRII),SE=serii,
                lower95=exp(logRII-1.96*selogrii),
                upper95=exp(logRII+1.96*selogrii))

# Hazard-based estimates more precise, as expected
rbind(ResultsHaz,ResultsIncid)
##                   RII        SE  lower95  upper95
## ResultsHaz   3.024354 0.3988552 2.335475 3.916428
## ResultsIncid 2.914453 0.4820819 2.107447 4.030487

2) Estimation of SII

Data generated from an additive hazards model with Weibull baseline hazard, including age and the socioeconomic rank x as predictors, with SII=0.05.

Uniform censoring (18%) was superimposed.

dat<-read.table("dataSII.txt",dec=".",sep=";",header=T)

Same Variables in the previous database:

head(dat)
##   ID      time status socgroup            x midpoint ageEntry  ageExit
## 1  1 12.780117      1        1 0.0002800293     0.05 34.88250 47.66262
## 2  2 14.784095      1        1 0.0003628294     0.05 37.00129 51.78538
## 3  3 11.629896      1        1 0.0026183705     0.05 34.13205 45.76195
## 4  4  5.769812      1        1 0.0106292281     0.05 42.06108 47.83089
## 5  5  2.268189      1        1 0.0107400702     0.05 48.54476 50.81295
## 6  6 13.159378      1        1 0.0138846786     0.05 43.35101 56.51039

Estimate SII from additive hazards model using age as time-scale

library(timereg)

fit<-aalen(Surv(time=ageEntry,time2=ageExit,event=status)~const(midpoint),
           data=dat,robust=1,n.sim=0,start.time=30)
siires<-fit$gamma
sesii<-sqrt(fit$robvar.gamma)
ResultsHaz<-c(SII=siires,SE=sesii,lower95=siires-1.96*sesii,upper95=siires+1.96*sesii)

Now Estimate SII from additive Poisson model

library(survival)
dat$ageGroup2<-tcut(dat$ageEntry, c(30,35,40,45,50,55,60,65,70,75,80,85), labels = 
              c("30+ - 35","35+ - 40","40+ - 45","45+ - 50","50+ - 55","55+ - 60",
                "60+ - 65","65+ - 70","70+-75","75+-80","80+-85"))
py<-pyears(Surv(time=time,event=status)~socgroup+ageGroup2, data=dat,data.frame=T,scale=1)
datAGG<-py$data[1:28,]
datAGG$midpoint<-NULL
dismid<-unique(dat$midpoint)
dismid<-dismid[order(dismid)]
for(g in 1:4)datAGG$midpoint[datAGG$socgroup==g]<-dismid[g]  

Use glm function with option

fit<-glm(event~pyears+pyears:ageGroup2+I(pyears*midpoint)-1,data=datAGG, 
    family=quasipoisson(link="identity"))
df.r <- fit$df.residual
disp<-sum((fit$weights * fit$residuals^2)[fit$weights > 0])/df.r
if(disp<1) fit<-glm(event~pyears+pyears:ageGroup2+I(pyears*midpoint)-1,data=datAGG,family=poisson(link="identity"))

siires<-summary(fit)$coefficients["I(pyears * midpoint)",1]
sesii<-summary(fit)$coefficients["I(pyears * midpoint)",2]

ResultsIncid<-c(SII=siires,SE=sesii,lower95=siires-1.96*sesii,upper95=siires+1.96*sesii)

Hazard-based estimates more precise, as expected

rbind(ResultsHaz,ResultsIncid)
##                     SII         SE    lower95    upper95
## ResultsHaz   0.05486944 0.01284961 0.02968419 0.08005468
## ResultsIncid 0.04865372 0.01343674 0.02231770 0.07498974

3) Estimate age-standardized SII from additive hazards model

Reference population weights (European IARC 1976 population)

ageRef<-c(0.152173913,0.152173913,0.152173913,0.152173913,0.152173913,0.130434783,0.108695652)

Fit a different model within each age-group using age as time-scale to get age-specific SIIs, then combine using weights from reference population into final For age-standardized estimate boot function can be used obtain final estimate, robust SE and robust CIs

library(timereg)
library(boot)
## 
## Attaching package: 'boot'
## The following object is masked from 'package:msm':
## 
##     cav
## The following object is masked from 'package:survival':
## 
##     aml
dat$ageExit<-dat$ageEntry+dat$time
starts<-c(30,35,40,45,50,55,60)
dat$ageGroup<-cut(dat$ageEntry, c(30,35,40,45,50,55,60,65), labels = 
                    c("30+ - 35","35+ - 40","40+ - 45","45+ - 50",
                      "50+ - 55","55+ - 60","60+ - 65"))
stat<-function(datc,ind)
{ datb<-datc[ind,]
  siiboot<-vector()
  for(i in 1:length(levels(dat$ageGroup)))
  { fit<-aalen(Surv(time=ageEntry,time2=ageExit,event=status)~const(midpoint),
               data=datb[datb$ageGroup==levels(datb$ageGroup)[i],],
               robust=0,n.sim=0,start.time=starts[i])
    siiboot<-c(siiboot,fit$gamma)
  }
  return(sum(ageRef*siiboot))}

bstrap<-boot(data=dat, statistic=stat, stype="i", R=100) #R= number of bootstrap repetitions
                                                         # ideally one should do 1000 bootstraps
btci<-boot.ci(bstrap,type=c("perc"))$percent[4:5]        #yields CIs based on percentiles 
sesii<-apply(bstrap$t,2,sd,na.rm=T)
ResultsHazAge<-c(SII=bstrap$t0,SE=sesii,lower95=btci[1],upper95=btci[2])  

Age-standardized SII from additive Poisson model

Person-year calculations by age-group and socioeconomic group with pyears function from survival package

library(survival)
dat$ageGroup2<-tcut(dat$ageEntry, c(30,35,40,45,50,55,60,65,70,75,80,85), labels = 
                      c("30+ - 35","35+ - 40","40+ - 45","45+ - 50","50+ - 55","55+ - 60",
                        "60+ - 65","65+ - 70","70+-75","75+-80","80+-85"))
py<-pyears(Surv(time=time,event=status)~socgroup+ageGroup2, data=dat,data.frame=T,scale=1)
datAGG<-py$data[1:28,]
datAGG$midpoint<-NULL
dismid<-unique(dat$midpoint)
dismid<-dismid[order(dismid)]
for(g in 1:4)datAGG$midpoint[datAGG$socgroup==g]<-dismid[g]  

# Include interaction between age-group and socioeconomic rank (premultiplied by pyears)
# to obtain age-specific SIIs, then combine using weights from reference population into final
# age-standardized estimate.
# Use delta-method to obtain SEs
fit<-glm(event~pyears+pyears:ageGroup2+I(pyears*midpoint):ageGroup2-1,data=datAGG, 
         family=quasipoisson(link="identity"))
df.r <- fit$df.residual
disp<-sum((fit$weights * fit$residuals^2)[fit$weights > 0])/df.r
if(disp<1) fit<-glm(event~pyears+pyears:ageGroup2+I(pyears*midpoint):ageGroup2-1,data=datAGG, 
                    family=poisson(link="identity"))
cvm<-vcov(fit)[8:14,8:14]
siibyage<-summary(fit)$coefficients[8:14,1]
library(msm)
sesii<-deltamethod(~0.152173913*x1+0.152173913*x2+0.152173913*x3+0.152173913*x4+
                     0.152173913*x5+0.130434783*x6+0.108695652*x7,siibyage,cvm)  
ResultsIncidAge<-c(SII=sum(ageRef*siibyage),SE=sesii,
                   lower95=sum(ageRef*siibyage)-1.96*sesii,
                   upper95=sum(ageRef*siibyage)+1.96*sesii)      

Hazard-based estimates more precise, as expected

rbind(ResultsHazAge,ResultsIncidAge)
##                        SII         SE    lower95    upper95
## ResultsHazAge   0.05084167 0.01238955 0.02694456 0.07455388
## ResultsIncidAge 0.05258353 0.01531166 0.02257267 0.08259439