This document makes an easier access to the supplementary material of the article entitled Relative Index of Inequality and Slope Index of Inequality: A Structured Regression Framework for Estimation Available Here
First get dataRII.txt and dataSII.txt from these links.
You do not need a dropbox account to download theses files !
Please contact either Margarita Moreno (margarita.moreno at mcri.edu.au) or me (aurelien.latouche at cnam.fr).
Data generated from Cox model with Weibull baseline hazard, including age and the socioeconomic rank x as predictors, with RII=3. Uniform censoring (27%) was superimposed.
dat<-read.table("dataRII.txt",dec=".",sep=";",header=T)
Variables in database:
ID: a unique identification number for each individual
time: time to event or censoring
status: 1=experienced event, 0=censored
socgroup: socioeconomic group, categories 1 to 4, with 1=highest level
x: socioeconomic rank (usually unobserved)
midpoint: group-specific socioeconomic rank, used to approximate x in regression (x_k)
ageEntry: age of entry in the study (in years)
head(dat)
## ID time status socgroup x midpoint ageEntry
## 1 1 5.91796 1 1 0.0007726050 0.05 61.40943
## 2 2 18.84545 0 1 0.0009642883 0.05 38.57793
## 3 3 15.47689 1 1 0.0018605184 0.05 63.55081
## 4 4 11.60002 1 1 0.0020955342 0.05 55.50917
## 5 5 16.96284 0 1 0.0031381176 0.05 35.67072
## 6 6 17.53922 0 1 0.0055284084 0.05 41.28230
## ageExit..
## 1 67.3273897429392 \\
## 2 57.4233816505875 \\
## 3 79.0277006776071 \\
## 4 67.1091917095493 \\
## 5 52.6335585908964 \\
## 6 58.8215236843098 \\
Estimate RII from Cox model using age as time-scale : Use coxph function from survival package to fit model. To obtain robust SEs add the term “+cluster(ID)”.
library(survival)
dat$ageExit<-dat$ageEntry+dat$time
fit<-summary(coxph(Surv(time=ageEntry,time2=ageExit,event=status)~midpoint+cluster(ID),data=dat))
logRII<-fit$coefficients[1]
selogrii<-fit$coefficients[4]
library(msm) #for generic delta-method function
serii<-deltamethod(~exp(x1),logRII,selogrii^2)
ResultsHaz<-c(RII=exp(logRII),SE=serii,lower95=fit$conf.int[3],upper95=fit$conf.int[4])
Estimate RII from multiplicative Poisson model
Person-year calculations by age-group and socioeconomic group with pyears function from survival package
Use tcut function to account for the fact that an individual changes age-group during follow-up
dat$ageGroup2<-tcut(dat$ageEntry, c(30,35,40,45,50,55,60,65,70,75,80,85), labels =
c("30+ - 35","35+ - 40","40+ - 45","45+ - 50","50+ - 55","55+ - 60",
"60+ - 65","65+ - 70","70+-75","75+-80","80+-85"))
py<-pyears(Surv(time=time,event=status)~socgroup+ageGroup2, data=dat,data.frame=T,scale=1)
datAGG<-py$data[1:28,]
datAGG$midpoint<-NULL
dismid<-unique(dat$midpoint)
dismid<-dismid[order(dismid)]
for(g in 1:4)datAGG$midpoint[datAGG$socgroup==g]<-dismid[g]
Use glm function with option “family=quasipoisson(link=”log“)” to estimate overdispersion
if dispersion parameter (disp) <1 then use “family=poisson(link=”log“)”
instead to fix dispersion parameter at 1.
Model formula should include log(pyears) as offset
fit<-glm(event~offset(log(pyears))+midpoint+ageGroup2,data=datAGG,family=quasipoisson(link="log") )
df.r <- fit$df.residual
disp<-sum((fit$weights * fit$residuals^2)[fit$weights > 0])/df.r
if(disp<1) fit<-glm(event~offset(log(pyears))+midpoint+ageGroup2,data=datAGG,
family=poisson(link="log") )
logRII<-summary(fit)$coefficients["midpoint",1]
selogrii<-summary(fit)$coefficients["midpoint",2]
serii<-deltamethod(~exp(x1),logRII,selogrii^2)
ResultsIncid<-c(RII=exp(logRII),SE=serii,
lower95=exp(logRII-1.96*selogrii),
upper95=exp(logRII+1.96*selogrii))
# Hazard-based estimates more precise, as expected
rbind(ResultsHaz,ResultsIncid)
## RII SE lower95 upper95
## ResultsHaz 3.024354 0.3988552 2.335475 3.916428
## ResultsIncid 2.914453 0.4820819 2.107447 4.030487
Data generated from an additive hazards model with Weibull baseline hazard, including age and the socioeconomic rank x as predictors, with SII=0.05.
Uniform censoring (18%) was superimposed.
dat<-read.table("dataSII.txt",dec=".",sep=";",header=T)
Same Variables in the previous database:
head(dat)
## ID time status socgroup x midpoint ageEntry ageExit
## 1 1 12.780117 1 1 0.0002800293 0.05 34.88250 47.66262
## 2 2 14.784095 1 1 0.0003628294 0.05 37.00129 51.78538
## 3 3 11.629896 1 1 0.0026183705 0.05 34.13205 45.76195
## 4 4 5.769812 1 1 0.0106292281 0.05 42.06108 47.83089
## 5 5 2.268189 1 1 0.0107400702 0.05 48.54476 50.81295
## 6 6 13.159378 1 1 0.0138846786 0.05 43.35101 56.51039
Estimate SII from additive hazards model using age as time-scale
library(timereg)
fit<-aalen(Surv(time=ageEntry,time2=ageExit,event=status)~const(midpoint),
data=dat,robust=1,n.sim=0,start.time=30)
siires<-fit$gamma
sesii<-sqrt(fit$robvar.gamma)
ResultsHaz<-c(SII=siires,SE=sesii,lower95=siires-1.96*sesii,upper95=siires+1.96*sesii)
Now Estimate SII from additive Poisson model
Person-year calculations by age-group and socioeconomic group with pyears function from survival package
Use tcut function to account for the fact that an individual changes age-group during follow-up
library(survival)
dat$ageGroup2<-tcut(dat$ageEntry, c(30,35,40,45,50,55,60,65,70,75,80,85), labels =
c("30+ - 35","35+ - 40","40+ - 45","45+ - 50","50+ - 55","55+ - 60",
"60+ - 65","65+ - 70","70+-75","75+-80","80+-85"))
py<-pyears(Surv(time=time,event=status)~socgroup+ageGroup2, data=dat,data.frame=T,scale=1)
datAGG<-py$data[1:28,]
datAGG$midpoint<-NULL
dismid<-unique(dat$midpoint)
dismid<-dismid[order(dismid)]
for(g in 1:4)datAGG$midpoint[datAGG$socgroup==g]<-dismid[g]
Use glm function with option
“family=quasipoisson(link=”identity“)” to estimate overdispersion
instead fix dispersion parameter at 1.
Model formula should exclude intercept (-1), include pyears as predictor and premultiply all other predictors by pyears
fit<-glm(event~pyears+pyears:ageGroup2+I(pyears*midpoint)-1,data=datAGG,
family=quasipoisson(link="identity"))
df.r <- fit$df.residual
disp<-sum((fit$weights * fit$residuals^2)[fit$weights > 0])/df.r
if(disp<1) fit<-glm(event~pyears+pyears:ageGroup2+I(pyears*midpoint)-1,data=datAGG,family=poisson(link="identity"))
siires<-summary(fit)$coefficients["I(pyears * midpoint)",1]
sesii<-summary(fit)$coefficients["I(pyears * midpoint)",2]
ResultsIncid<-c(SII=siires,SE=sesii,lower95=siires-1.96*sesii,upper95=siires+1.96*sesii)
Hazard-based estimates more precise, as expected
rbind(ResultsHaz,ResultsIncid)
## SII SE lower95 upper95
## ResultsHaz 0.05486944 0.01284961 0.02968419 0.08005468
## ResultsIncid 0.04865372 0.01343674 0.02231770 0.07498974
Reference population weights (European IARC 1976 population)
ageRef<-c(0.152173913,0.152173913,0.152173913,0.152173913,0.152173913,0.130434783,0.108695652)
Fit a different model within each age-group using age as time-scale to get age-specific SIIs, then combine using weights from reference population into final For age-standardized estimate boot function can be used obtain final estimate, robust SE and robust CIs
library(timereg)
library(boot)
##
## Attaching package: 'boot'
## The following object is masked from 'package:msm':
##
## cav
## The following object is masked from 'package:survival':
##
## aml
dat$ageExit<-dat$ageEntry+dat$time
starts<-c(30,35,40,45,50,55,60)
dat$ageGroup<-cut(dat$ageEntry, c(30,35,40,45,50,55,60,65), labels =
c("30+ - 35","35+ - 40","40+ - 45","45+ - 50",
"50+ - 55","55+ - 60","60+ - 65"))
stat<-function(datc,ind)
{ datb<-datc[ind,]
siiboot<-vector()
for(i in 1:length(levels(dat$ageGroup)))
{ fit<-aalen(Surv(time=ageEntry,time2=ageExit,event=status)~const(midpoint),
data=datb[datb$ageGroup==levels(datb$ageGroup)[i],],
robust=0,n.sim=0,start.time=starts[i])
siiboot<-c(siiboot,fit$gamma)
}
return(sum(ageRef*siiboot))}
bstrap<-boot(data=dat, statistic=stat, stype="i", R=100) #R= number of bootstrap repetitions
# ideally one should do 1000 bootstraps
btci<-boot.ci(bstrap,type=c("perc"))$percent[4:5] #yields CIs based on percentiles
sesii<-apply(bstrap$t,2,sd,na.rm=T)
ResultsHazAge<-c(SII=bstrap$t0,SE=sesii,lower95=btci[1],upper95=btci[2])
Person-year calculations by age-group and socioeconomic group with pyears function from survival package
library(survival)
dat$ageGroup2<-tcut(dat$ageEntry, c(30,35,40,45,50,55,60,65,70,75,80,85), labels =
c("30+ - 35","35+ - 40","40+ - 45","45+ - 50","50+ - 55","55+ - 60",
"60+ - 65","65+ - 70","70+-75","75+-80","80+-85"))
py<-pyears(Surv(time=time,event=status)~socgroup+ageGroup2, data=dat,data.frame=T,scale=1)
datAGG<-py$data[1:28,]
datAGG$midpoint<-NULL
dismid<-unique(dat$midpoint)
dismid<-dismid[order(dismid)]
for(g in 1:4)datAGG$midpoint[datAGG$socgroup==g]<-dismid[g]
# Include interaction between age-group and socioeconomic rank (premultiplied by pyears)
# to obtain age-specific SIIs, then combine using weights from reference population into final
# age-standardized estimate.
# Use delta-method to obtain SEs
fit<-glm(event~pyears+pyears:ageGroup2+I(pyears*midpoint):ageGroup2-1,data=datAGG,
family=quasipoisson(link="identity"))
df.r <- fit$df.residual
disp<-sum((fit$weights * fit$residuals^2)[fit$weights > 0])/df.r
if(disp<1) fit<-glm(event~pyears+pyears:ageGroup2+I(pyears*midpoint):ageGroup2-1,data=datAGG,
family=poisson(link="identity"))
cvm<-vcov(fit)[8:14,8:14]
siibyage<-summary(fit)$coefficients[8:14,1]
library(msm)
sesii<-deltamethod(~0.152173913*x1+0.152173913*x2+0.152173913*x3+0.152173913*x4+
0.152173913*x5+0.130434783*x6+0.108695652*x7,siibyage,cvm)
ResultsIncidAge<-c(SII=sum(ageRef*siibyage),SE=sesii,
lower95=sum(ageRef*siibyage)-1.96*sesii,
upper95=sum(ageRef*siibyage)+1.96*sesii)
Hazard-based estimates more precise, as expected
rbind(ResultsHazAge,ResultsIncidAge)
## SII SE lower95 upper95
## ResultsHazAge 0.05084167 0.01238955 0.02694456 0.07455388
## ResultsIncidAge 0.05258353 0.01531166 0.02257267 0.08259439