This document makes an easier access to the supplementary material of the article entitled Relative Index of Inequality and Slope Index of Inequality: A Structured Regression Framework for Estimation Available Here
First get dataRII.txt and dataSII.txt from these links.
Please contact either Margarita Moreno (margarita.moreno at mcri.edu.au) or me (aurelien.latouche at cnam.fr).
Data generated from Cox model with Weibull baseline hazard, including age and the socioeconomic rank x as predictors, with RII=3. Uniform censoring (27%) was superimposed.
dat<-read.table("dataRII.txt",dec=".",sep=";",header=T)
Variables in database:
ID: a unique identification number for each individual
time: time to event or censoring
status: 1=experienced event, 0=censored
socgroup: socioeconomic group, categories 1 to 4, with 1=highest level
x: socioeconomic rank (usually unobserved)
midpoint: group-specific socioeconomic rank, used to approximate x in regression (x_k)
ageEntry: age of entry in the study (in years)
head(dat)
## ID time status socgroup x midpoint ageEntry
## 1 1 5.91796 1 1 0.0007726050 0.05 61.40943
## 2 2 18.84545 0 1 0.0009642883 0.05 38.57793
## 3 3 15.47689 1 1 0.0018605184 0.05 63.55081
## 4 4 11.60002 1 1 0.0020955342 0.05 55.50917
## 5 5 16.96284 0 1 0.0031381176 0.05 35.67072
## 6 6 17.53922 0 1 0.0055284084 0.05 41.28230
## ageExit..
## 1 67.3273897429392 \\
## 2 57.4233816505875 \\
## 3 79.0277006776071 \\
## 4 67.1091917095493 \\
## 5 52.6335585908964 \\
## 6 58.8215236843098 \\
Estimate RII from Cox model using age as time-scale : Use coxph function from survival package to fit model. To obtain robust SEs add the term “+cluster(ID)”.
library(survival)
dat$ageExit<-dat$ageEntry+dat$time
fit<-summary(coxph(Surv(time=ageEntry,time2=ageExit,event=status)~midpoint+cluster(ID),data=dat))
logRII<-fit$coefficients[1]
selogrii<-fit$coefficients[4]
library(msm) #for generic delta-method function
serii<-deltamethod(~exp(x1),logRII,selogrii^2)
ResultsHaz<-c(RII=exp(logRII),SE=serii,lower95=fit$conf.int[3],upper95=fit$conf.int[4])
Estimate RII from multiplicative Poisson model
Person-year calculations by age-group and socioeconomic group with pyears function from survival package
Use tcut function to account for the fact that an individual changes age-group during follow-up
dat$ageGroup2<-tcut(dat$ageEntry, c(30,35,40,45,50,55,60,65,70,75,80,85), labels =
c("30+ - 35","35+ - 40","40+ - 45","45+ - 50","50+ - 55","55+ - 60",
"60+ - 65","65+ - 70","70+-75","75+-80","80+-85"))
py<-pyears(Surv(time=time,event=status)~socgroup+ageGroup2, data=dat,data.frame=T,scale=1)
datAGG<-py$data[1:28,]
datAGG$midpoint<-NULL
dismid<-unique(dat$midpoint)
dismid<-dismid[order(dismid)]
for(g in 1:4)datAGG$midpoint[datAGG$socgroup==g]<-dismid[g]
Use glm function with option “family=quasipoisson(link=”log“)” to estimate overdispersion
if dispersion parameter (disp) <1 then use “family=poisson(link=”log“)”
instead to fix dispersion parameter at 1.
Model formula should include log(pyears) as offset
fit<-glm(event~offset(log(pyears))+midpoint+ageGroup2,data=datAGG,family=quasipoisson(link="log") )
df.r <- fit$df.residual
disp<-sum((fit$weights * fit$residuals^2)[fit$weights > 0])/df.r
if(disp<1) fit<-glm(event~offset(log(pyears))+midpoint+ageGroup2,data=datAGG,
family=poisson(link="log") )
logRII<-summary(fit)$coefficients["midpoint",1]
selogrii<-summary(fit)$coefficients["midpoint",2]
serii<-deltamethod(~exp(x1),logRII,selogrii^2)
ResultsIncid<-c(RII=exp(logRII),SE=serii,
lower95=exp(logRII-1.96*selogrii),
upper95=exp(logRII+1.96*selogrii))
# Hazard-based estimates more precise, as expected
rbind(ResultsHaz,ResultsIncid)
## RII SE lower95 upper95
## ResultsHaz 3.024354 0.3988552 2.335475 3.916428
## ResultsIncid 2.914453 0.4820819 2.107447 4.030487
Data generated from an additive hazards model with Weibull baseline hazard, including age and the socioeconomic rank x as predictors, with SII=0.05.
Uniform censoring (18%) was superimposed.
dat<-read.table("dataSII.txt",dec=".",sep=";",header=T)
Same Variables in the previous database:
head(dat)
## ID time status socgroup x midpoint ageEntry ageExit
## 1 1 12.780117 1 1 0.0002800293 0.05 34.88250 47.66262
## 2 2 14.784095 1 1 0.0003628294 0.05 37.00129 51.78538
## 3 3 11.629896 1 1 0.0026183705 0.05 34.13205 45.76195
## 4 4 5.769812 1 1 0.0106292281 0.05 42.06108 47.83089
## 5 5 2.268189 1 1 0.0107400702 0.05 48.54476 50.81295
## 6 6 13.159378 1 1 0.0138846786 0.05 43.35101 56.51039
Estimate SII from additive hazards model using age as time-scale
library(timereg)
fit<-aalen(Surv(time=ageEntry,time2=ageExit,event=status)~const(midpoint),
data=dat,robust=1,n.sim=0,start.time=30)
siires<-fit$gamma
sesii<-sqrt(fit$robvar.gamma)
ResultsHaz<-c(SII=siires,SE=sesii,lower95=siires-1.96*sesii,upper95=siires+1.96*sesii)
Now Estimate SII from additive Poisson model
Person-year calculations by age-group and socioeconomic group with pyears function from survival package
Use tcut function to account for the fact that an individual changes age-group during follow-up
library(survival)
dat$ageGroup2<-tcut(dat$ageEntry, c(30,35,40,45,50,55,60,65,70,75,80,85), labels =
c("30+ - 35","35+ - 40","40+ - 45","45+ - 50","50+ - 55","55+ - 60",
"60+ - 65","65+ - 70","70+-75","75+-80","80+-85"))
py<-pyears(Surv(time=time,event=status)~socgroup+ageGroup2, data=dat,data.frame=T,scale=1)
datAGG<-py$data[1:28,]
datAGG$midpoint<-NULL
dismid<-unique(dat$midpoint)
dismid<-dismid[order(dismid)]
for(g in 1:4)datAGG$midpoint[datAGG$socgroup==g]<-dismid[g]
Use glm function with option
“family=quasipoisson(link=”identity“)” to estimate overdispersion
instead fix dispersion parameter at 1.
Model formula should exclude intercept (-1), include pyears as predictor and premultiply all other predictors by pyears
fit<-glm(event~pyears+pyears:ageGroup2+I(pyears*midpoint)-1,data=datAGG,
family=quasipoisson(link="identity"))
df.r <- fit$df.residual
disp<-sum((fit$weights * fit$residuals^2)[fit$weights > 0])/df.r
if(disp<1) fit<-glm(event~pyears+pyears:ageGroup2+I(pyears*midpoint)-1,data=datAGG,family=poisson(link="identity"))
siires<-summary(fit)$coefficients["I(pyears * midpoint)",1]
sesii<-summary(fit)$coefficients["I(pyears * midpoint)",2]
ResultsIncid<-c(SII=siires,SE=sesii,lower95=siires-1.96*sesii,upper95=siires+1.96*sesii)
Hazard-based estimates more precise, as expected
rbind(ResultsHaz,ResultsIncid)
## SII SE lower95 upper95
## ResultsHaz 0.05486944 0.01284961 0.02968419 0.08005468
## ResultsIncid 0.04865372 0.01343674 0.02231770 0.07498974
Reference population weights (European IARC 1976 population)
ageRef<-c(0.152173913,0.152173913,0.152173913,0.152173913,0.152173913,0.130434783,0.108695652)
Fit a different model within each age-group using age as time-scale to get age-specific SIIs, then combine using weights from reference population into final For age-standardized estimate boot function can be used obtain final estimate, robust SE and robust CIs
library(timereg)
library(boot)
##
## Attaching package: 'boot'
## The following object is masked from 'package:msm':
##
## cav
## The following object is masked from 'package:survival':
##
## aml
dat$ageExit<-dat$ageEntry+dat$time
starts<-c(30,35,40,45,50,55,60)
dat$ageGroup<-cut(dat$ageEntry, c(30,35,40,45,50,55,60,65), labels =
c("30+ - 35","35+ - 40","40+ - 45","45+ - 50",
"50+ - 55","55+ - 60","60+ - 65"))
stat<-function(datc,ind)
{ datb<-datc[ind,]
siiboot<-vector()
for(i in 1:length(levels(dat$ageGroup)))
{ fit<-aalen(Surv(time=ageEntry,time2=ageExit,event=status)~const(midpoint),
data=datb[datb$ageGroup==levels(datb$ageGroup)[i],],
robust=0,n.sim=0,start.time=starts[i])
siiboot<-c(siiboot,fit$gamma)
}
return(sum(ageRef*siiboot))}
bstrap<-boot(data=dat, statistic=stat, stype="i", R=100) #R= number of bootstrap repetitions
# ideally one should do 1000 bootstraps
btci<-boot.ci(bstrap,type=c("perc"))$percent[4:5] #yields CIs based on percentiles
sesii<-apply(bstrap$t,2,sd,na.rm=T)
ResultsHazAge<-c(SII=bstrap$t0,SE=sesii,lower95=btci[1],upper95=btci[2])
Person-year calculations by age-group and socioeconomic group with pyears function from survival package
library(survival)
dat$ageGroup2<-tcut(dat$ageEntry, c(30,35,40,45,50,55,60,65,70,75,80,85), labels =
c("30+ - 35","35+ - 40","40+ - 45","45+ - 50","50+ - 55","55+ - 60",
"60+ - 65","65+ - 70","70+-75","75+-80","80+-85"))
py<-pyears(Surv(time=time,event=status)~socgroup+ageGroup2, data=dat,data.frame=T,scale=1)
datAGG<-py$data[1:28,]
datAGG$midpoint<-NULL
dismid<-unique(dat$midpoint)
dismid<-dismid[order(dismid)]
for(g in 1:4)datAGG$midpoint[datAGG$socgroup==g]<-dismid[g]
# Include interaction between age-group and socioeconomic rank (premultiplied by pyears)
# to obtain age-specific SIIs, then combine using weights from reference population into final
# age-standardized estimate.
# Use delta-method to obtain SEs
fit<-glm(event~pyears+pyears:ageGroup2+I(pyears*midpoint):ageGroup2-1,data=datAGG,
family=quasipoisson(link="identity"))
df.r <- fit$df.residual
disp<-sum((fit$weights * fit$residuals^2)[fit$weights > 0])/df.r
if(disp<1) fit<-glm(event~pyears+pyears:ageGroup2+I(pyears*midpoint):ageGroup2-1,data=datAGG,
family=poisson(link="identity"))
cvm<-vcov(fit)[8:14,8:14]
siibyage<-summary(fit)$coefficients[8:14,1]
library(msm)
sesii<-deltamethod(~0.152173913*x1+0.152173913*x2+0.152173913*x3+0.152173913*x4+
0.152173913*x5+0.130434783*x6+0.108695652*x7,siibyage,cvm)
ResultsIncidAge<-c(SII=sum(ageRef*siibyage),SE=sesii,
lower95=sum(ageRef*siibyage)-1.96*sesii,
upper95=sum(ageRef*siibyage)+1.96*sesii)
Hazard-based estimates more precise, as expected
rbind(ResultsHazAge,ResultsIncidAge)
## SII SE lower95 upper95
## ResultsHazAge 0.05084167 0.01258943 0.0322227 0.08196735
## ResultsIncidAge 0.05258353 NA NA NA