This document makes an easier access to the supplementary material of the article entitled Relative Index of Inequality and Slope Index of Inequality: A Structured Regression Framework for Estimation Available Here

First get dataRII.txt and dataSII.txt from these links.

You do not need a dropbox account to download theses files !

Please contact either Margarita Moreno (margarita.moreno at mcri.edu.au) or me (aurelien.latouche at cnam.fr).

1) Estimation of RII

Data generated from Cox model with Weibull baseline hazard, including age and the socioeconomic rank x as predictors, with RII=3. Uniform censoring (27%) was superimposed.

dat<-read.table("dataRII.txt",dec=".",sep=";",header=T)

Variables in database:

ID: a unique identification number for each individual
time: time to event or censoring
status: 1=experienced event, 0=censored
socgroup: socioeconomic group, categories 1 to 4, with 1=highest level
x: socioeconomic rank (usually unobserved)
midpoint: group-specific socioeconomic rank, used to approximate x in regression (x_k)
ageEntry: age of entry in the study (in years)

head(dat)

##   ID     time status socgroup            x midpoint ageEntry
## 1  1  5.91796      1        1 0.0007726050     0.05 61.40943
## 2  2 18.84545      0        1 0.0009642883     0.05 38.57793
## 3  3 15.47689      1        1 0.0018605184     0.05 63.55081
## 4  4 11.60002      1        1 0.0020955342     0.05 55.50917
## 5  5 16.96284      0        1 0.0031381176     0.05 35.67072
## 6  6 17.53922      0        1 0.0055284084     0.05 41.28230
##             ageExit..
## 1 67.3273897429392 \\
## 2 57.4233816505875 \\
## 3 79.0277006776071 \\
## 4 67.1091917095493 \\
## 5 52.6335585908964 \\
## 6 58.8215236843098 \\

Estimate RII from Cox model using age as time-scale : Use coxph function from survival package to fit model. To obtain robust SEs add the term “+cluster(ID)”.

library(survival)
dat$ageExit<-dat$ageEntry+dat$time
fit<-summary(coxph(Surv(time=ageEntry,time2=ageExit,event=status)~midpoint+cluster(ID),data=dat))
logRII<-fit$coefficients[1]
selogrii<-fit$coefficients[4]
library(msm) #for generic delta-method function
serii<-deltamethod(~exp(x1),logRII,selogrii^2)
ResultsHaz<-c(RII=exp(logRII),SE=serii,lower95=fit$conf.int[3],upper95=fit$conf.int[4])

Estimate RII from multiplicative Poisson model

Person-year calculations by age-group and socioeconomic group with pyears function from survival package
Use tcut function to account for the fact that an individual changes age-group during follow-up

dat$ageGroup2<-tcut(dat$ageEntry, c(30,35,40,45,50,55,60,65,70,75,80,85), labels = 
                  c("30+ - 35","35+ - 40","40+ - 45","45+ - 50","50+ - 55","55+ - 60",
                    "60+ - 65","65+ - 70","70+-75","75+-80","80+-85"))
py<-pyears(Surv(time=time,event=status)~socgroup+ageGroup2, data=dat,data.frame=T,scale=1)
datAGG<-py$data[1:28,]
datAGG$midpoint<-NULL
dismid<-unique(dat$midpoint)
dismid<-dismid[order(dismid)]
for(g in 1:4)datAGG$midpoint[datAGG$socgroup==g]<-dismid[g]

Use glm function with option “family=quasipoisson(link=”log“)” to estimate overdispersion

if dispersion parameter (disp) <1 then use “family=poisson(link=”log“)”
instead to fix dispersion parameter at 1.
Model formula should include log(pyears) as offset

fit<-glm(event~offset(log(pyears))+midpoint+ageGroup2,data=datAGG,family=quasipoisson(link="log") )
df.r <- fit$df.residual
disp<-sum((fit$weights * fit$residuals^2)[fit$weights > 0])/df.r
if(disp<1) fit<-glm(event~offset(log(pyears))+midpoint+ageGroup2,data=datAGG,
                    family=poisson(link="log") )

logRII<-summary(fit)$coefficients["midpoint",1]
selogrii<-summary(fit)$coefficients["midpoint",2]
serii<-deltamethod(~exp(x1),logRII,selogrii^2)
ResultsIncid<-c(RII=exp(logRII),SE=serii,
                lower95=exp(logRII-1.96*selogrii),
                upper95=exp(logRII+1.96*selogrii))

# Hazard-based estimates more precise, as expected
rbind(ResultsHaz,ResultsIncid)

##                   RII        SE  lower95  upper95
## ResultsHaz   3.024354 0.3988552 2.335475 3.916428
## ResultsIncid 2.914453 0.4820819 2.107447 4.030487

2) Estimation of SII

Data generated from an additive hazards model with Weibull baseline hazard, including age and the socioeconomic rank x as predictors, with SII=0.05.

Uniform censoring (18%) was superimposed.

dat<-read.table("dataSII.txt",dec=".",sep=";",header=T)

Same Variables in the previous database:

head(dat)

##   ID      time status socgroup            x midpoint ageEntry  ageExit
## 1  1 12.780117      1        1 0.0002800293     0.05 34.88250 47.66262
## 2  2 14.784095      1        1 0.0003628294     0.05 37.00129 51.78538
## 3  3 11.629896      1        1 0.0026183705     0.05 34.13205 45.76195
## 4  4  5.769812      1        1 0.0106292281     0.05 42.06108 47.83089
## 5  5  2.268189      1        1 0.0107400702     0.05 48.54476 50.81295
## 6  6 13.159378      1        1 0.0138846786     0.05 43.35101 56.51039

Estimate SII from additive hazards model using age as time-scale

Use aalen function from timereg package to fit model
the option “robust=1” yields robust SEs

library(timereg)

fit<-aalen(Surv(time=ageEntry,time2=ageExit,event=status)~const(midpoint),
           data=dat,robust=1,n.sim=0,start.time=30)
siires<-fit$gamma
sesii<-sqrt(fit$robvar.gamma)
ResultsHaz<-c(SII=siires,SE=sesii,lower95=siires-1.96*sesii,upper95=siires+1.96*sesii)

Now Estimate SII from additive Poisson model

Person-year calculations by age-group and socioeconomic group with pyears function from survival package
Use tcut function to account for the fact that an individual changes age-group during follow-up

library(survival)
dat$ageGroup2<-tcut(dat$ageEntry, c(30,35,40,45,50,55,60,65,70,75,80,85), labels = 
              c("30+ - 35","35+ - 40","40+ - 45","45+ - 50","50+ - 55","55+ - 60",
                "60+ - 65","65+ - 70","70+-75","75+-80","80+-85"))
py<-pyears(Surv(time=time,event=status)~socgroup+ageGroup2, data=dat,data.frame=T,scale=1)
datAGG<-py$data[1:28,]
datAGG$midpoint<-NULL
dismid<-unique(dat$midpoint)
dismid<-dismid[order(dismid)]
for(g in 1:4)datAGG$midpoint[datAGG$socgroup==g]<-dismid[g]

Use glm function with option

“family=quasipoisson(link=”identity“)” to estimate overdispersion
if dispersion parameter (disp) <1 then use “family=poisson(link=”identity“)”
instead fix dispersion parameter at 1.
Model formula should exclude intercept (-1), include pyears as predictor and premultiply all other predictors by pyears

fit<-glm(event~pyears+pyears:ageGroup2+I(pyears*midpoint)-1,data=datAGG, 
    family=quasipoisson(link="identity"))
df.r <- fit$df.residual
disp<-sum((fit$weights * fit$residuals^2)[fit$weights > 0])/df.r
if(disp<1) fit<-glm(event~pyears+pyears:ageGroup2+I(pyears*midpoint)-1,data=datAGG,family=poisson(link="identity"))

siires<-summary(fit)$coefficients["I(pyears * midpoint)",1]
sesii<-summary(fit)$coefficients["I(pyears * midpoint)",2]

ResultsIncid<-c(SII=siires,SE=sesii,lower95=siires-1.96*sesii,upper95=siires+1.96*sesii)

Hazard-based estimates more precise, as expected

rbind(ResultsHaz,ResultsIncid)

##                     SII         SE    lower95    upper95
## ResultsHaz   0.05486944 0.01284961 0.02968419 0.08005468
## ResultsIncid 0.04865372 0.01343674 0.02231770 0.07498974

3) Estimate age-standardized SII from additive hazards model

Reference population weights (European IARC 1976 population)

ageRef<-c(0.152173913,0.152173913,0.152173913,0.152173913,0.152173913,0.130434783,0.108695652)

Fit a different model within each age-group using age as time-scale to get age-specific SIIs, then combine using weights from reference population into final For age-standardized estimate boot function can be used obtain final estimate, robust SE and robust CIs

library(timereg)
library(boot)

## 
## Attaching package: 'boot'

## The following object is masked from 'package:msm':
## 
##     cav

## The following object is masked from 'package:survival':
## 
##     aml

dat$ageExit<-dat$ageEntry+dat$time
starts<-c(30,35,40,45,50,55,60)
dat$ageGroup<-cut(dat$ageEntry, c(30,35,40,45,50,55,60,65), labels = 
                    c("30+ - 35","35+ - 40","40+ - 45","45+ - 50",
                      "50+ - 55","55+ - 60","60+ - 65"))
stat<-function(datc,ind)
{ datb<-datc[ind,]
  siiboot<-vector()
  for(i in 1:length(levels(dat$ageGroup)))
  { fit<-aalen(Surv(time=ageEntry,time2=ageExit,event=status)~const(midpoint),
               data=datb[datb$ageGroup==levels(datb$ageGroup)[i],],
               robust=0,n.sim=0,start.time=starts[i])
    siiboot<-c(siiboot,fit$gamma)
  }
  return(sum(ageRef*siiboot))}

bstrap<-boot(data=dat, statistic=stat, stype="i", R=100) #R= number of bootstrap repetitions
                                                         # ideally one should do 1000 bootstraps
btci<-boot.ci(bstrap,type=c("perc"))$percent[4:5]        #yields CIs based on percentiles 
sesii<-apply(bstrap$t,2,sd,na.rm=T)
ResultsHazAge<-c(SII=bstrap$t0,SE=sesii,lower95=btci[1],upper95=btci[2])

Age-standardized SII from additive Poisson model

Person-year calculations by age-group and socioeconomic group with pyears function from survival package

Use tcut function to account for the fact that an individual changes age-group during follow-up

library(survival)
dat$ageGroup2<-tcut(dat$ageEntry, c(30,35,40,45,50,55,60,65,70,75,80,85), labels = 
                      c("30+ - 35","35+ - 40","40+ - 45","45+ - 50","50+ - 55","55+ - 60",
                        "60+ - 65","65+ - 70","70+-75","75+-80","80+-85"))
py<-pyears(Surv(time=time,event=status)~socgroup+ageGroup2, data=dat,data.frame=T,scale=1)
datAGG<-py$data[1:28,]
datAGG$midpoint<-NULL
dismid<-unique(dat$midpoint)
dismid<-dismid[order(dismid)]
for(g in 1:4)datAGG$midpoint[datAGG$socgroup==g]<-dismid[g]  

# Include interaction between age-group and socioeconomic rank (premultiplied by pyears)
# to obtain age-specific SIIs, then combine using weights from reference population into final
# age-standardized estimate.
# Use delta-method to obtain SEs
fit<-glm(event~pyears+pyears:ageGroup2+I(pyears*midpoint):ageGroup2-1,data=datAGG, 
         family=quasipoisson(link="identity"))
df.r <- fit$df.residual
disp<-sum((fit$weights * fit$residuals^2)[fit$weights > 0])/df.r
if(disp<1) fit<-glm(event~pyears+pyears:ageGroup2+I(pyears*midpoint):ageGroup2-1,data=datAGG, 
                    family=poisson(link="identity"))
cvm<-vcov(fit)[8:14,8:14]
siibyage<-summary(fit)$coefficients[8:14,1]
library(msm)
sesii<-deltamethod(~0.152173913*x1+0.152173913*x2+0.152173913*x3+0.152173913*x4+
                     0.152173913*x5+0.130434783*x6+0.108695652*x7,siibyage,cvm)  
ResultsIncidAge<-c(SII=sum(ageRef*siibyage),SE=sesii,
                   lower95=sum(ageRef*siibyage)-1.96*sesii,
                   upper95=sum(ageRef*siibyage)+1.96*sesii)

Hazard-based estimates more precise, as expected

rbind(ResultsHazAge,ResultsIncidAge)

##                        SII         SE    lower95    upper95
## ResultsHazAge   0.05084167 0.01238955 0.02694456 0.07455388
## ResultsIncidAge 0.05258353 0.01531166 0.02257267 0.08259439

Webappendix For Moreno-Betancur et al. 2015 Epidemiology

A. Latouche

September 2016

1) Estimation of RII

2) Estimation of SII

3) Estimate age-standardized SII from additive hazards model

Age-standardized SII from additive Poisson model