This is a document to support our manuscript submission to International Journal of Epidemiology. In this page, we present the code used for estimating ICC values under exchangeable, nested/block exchangeable and exponential decay models for longitudinal cluster randomized trials. The same code can be used to analyzing data from longitudinal cluster randomized trials under different assumed correlation structures.
# Load packages
library(lme4)
library(glmmTMB)
# Load your dataset
data <- write.csv("replace it by the local path to your dataset")
# Convert Period, Cluster and Treatment to factors
# (if they were coded as numerical variables in the dataset)
data[,c("Period", "Cluster", "Treatment")] <-
lapply(data[,c("Period", "Cluster", "Treatment")], factor)
# Fit three models
# Exchangeable model
fit_HH <- lmer(Outcome ~ Period + Treatment + (1|Cluster), data = data)
# Nested exchangeable model
fit_HG <- lmer(Outcome ~ Period + Treatment +(1|Cluster) + (1|Period:Cluster), data = data)
# Exponential decay model
fit_AR <- glmmTMB(Outcome ~ Period + Treatment + ar1(Period + 0 | Cluster), REML=T,
data = data, family = gaussian)
ICC can be calculated manually using the formula we provided in our manuscript or run the following code
# Extract variance component estimates
V1 <- as.data.frame(VarCorr(fit_HH))
V2 <- as.data.frame(VarCorr(fit_HG))
V3 <- VarCorr(fit_AR)
# Estimate within-period ICC for exchangeable model
wpicc_hh <- round(V1[,"vcov"][which(V1[,"grp"] == "Cluster")]/sum(V1[,"vcov"]),4)
# Estimate within-, between-period ICC and cac for nested exchangeable model
wpicc_hg <- round((V2[,"vcov"][which(V2[,"grp"] == "Cluster")] + V2[,"vcov"][which(V2[,"grp"] == "Period:Cluster")])/sum(V2[,"vcov"]),4)
bpicc_hg <- round((V2[,"vcov"][which(V2[,"grp"] == "Cluster")])/sum(V2[,"vcov"]),4)
cac_hg <- round(bpicc_hg/wpicc_hg,4)
# Estimate within-period ICC and decay rate for exponential decay model
wpicc_ar <- round(V3$cond$Cluster[1,1]/sum(V3$cond$Cluster[1,1],(attr(V3$cond,"sc"))^2),4)
ar1 <- round(attr(V3$cond$Cluster,"correlation")[1,2],4)
**Load your data properly in SAS;
**Exchangeable model;
PROC MIXED DATA=data ;
CLASS Cluster Period Treatment;
MODEL Outcome = Period Treatment/ SOLUTION;
RANDOM Cluster;
ods output covparms=HHcovparms fitstatistics=HHfitstatistics solutionf=HHsolutionf;
RUN;
**Nested exchangeable;
PROC MIXED DATA=data;
CLASS Cluster Period Treatment;
MODEL Outcome = Period Treatment/ SOLUTION;
RANDOM Cluster Period*Cluster ;
ods output covparms=NEcovparms fitstatistics=NEfitstatistics solutionf=NEsolutionf;
RUN;
**Exponential decay;
**PROC HPMIXED can be used to improve the computational speed;
PROC HPMIXED DATA=data;
CLASS Cluster Period Treatment;
MODEL Outcome = Period Treatment/ SOLUTION;
RANDOM Period / SUBJECT=Cluster TYPE=AR(1);
ods output covparms=ARcovparms fitstatistics=ARfitstatistics solutionf=ARsolutionf;
RUN;
**Estimate within-period ICC for exchangeable model;
PROC SQL;
CREATE TABLE icchh AS
SELECT sum(estimate*(covparm='Cluster')) / sum(estimate) AS wpicc
FROM HHcovparms;
QUIT;
**Estimate within-, between-period ICC and cac for nested exchangeable model;
PROC SQL;
CREATE TABLE icchg AS
SELECT
sum(estimate*(covparm ^='Residual'))/sum(estimate) AS wpicc,
sum(estimate*(covparm='Cluster')) / sum(estimate) AS bpicc,
sum(estimate*(covparm='Cluster'))/ sum(estimate*(covparm ^='Residual')) AS cac
FROM Necovparms;
QUIT;
**Estimate within-period ICC and decay rate for exponential decay model;
PROC SQL;
CREATE TABLE iccar AS
SELECT
sum(estimate*(covparm='Variance')) / sum(estimate*(covparm ^='AR(1)')) AS wpicc,
sum(estimate*(covparm='AR(1)')) as ar1
FROM ARcovparms;
QUIT;
# Load your dataset
import delimited "replace it by the local path to your dataset", clear
# Exchangeable model
mixed outcome i.period i.treatment_n || cluster:, reml
# Block exchangeable model
mixed outcome i.period i.treatment_n || cluster: || period:, reml
# ICC calculations
. estat icc
# Load packages
library(lme4)
library(glmmTMB)
# Load your dataset
data <- write.csv("replace it by the local path to your dataset")
# Convert Period, Cluster and Treatment to factors
# (if they were coded as numerical variables in the dataset)
data[,c("Period", "Cluster", "ID", "Treatment")] <-
lapply(data[,c("Period", "Cluster", "ID", "Treatment")], factor)
# Exchangeable model
fit_HH <- lmer(Outcome ~ Period + Treatment + (1|Cluster) + (1|ID), data = data)
# Block exchangeable model
fit_HG <- lmer(Outcome ~ Period + Treatment +(1|Cluster) + (1|Period:Cluster) + (1|ID), data = data)
# Exponential decay model
fit_AR <- glmmTMB(Outcome ~ Period + Treatment + ar1(Period + 0 | Cluster) + (1|ID), REML=T, data = data, family = gaussian)
# Extract variance component estimates
V1 <- as.data.frame(VarCorr(fit_HH))
V2 <- as.data.frame(VarCorr(fit_HG))
V3 <- VarCorr(fit_AR)
# Estimate within-period ICC and IAC for exchangeable model
wpicc_hh <- round(V1[,"vcov"][which(V1[,"grp"] == "Cluster")]/sum(V1[,"vcov"]),4)
wiicc_hh <- round(sum(V1[,"vcov"][which(V1[,"grp"] == "Cluster")], V1[,"vcov"][which(V1[,"grp"] == "ID")])/sum(V1[,"vcov"]),4)
# Estimate within-, between-period ICC, IAC and CAC for nested exchangeable model
wpicc_hg <- round((V2[,"vcov"][which(V2[,"grp"] == "Cluster")] + V2[,"vcov"][which(V2[,"grp"] == "Period:Cluster")])/sum(V2[,"vcov"]),4)
bpicc_hg <- round((V2[,"vcov"][which(V2[,"grp"] == "Cluster")])/sum(V2[,"vcov"]),4)
wiicc_hg <- round(sum(V2[,"vcov"][which(V2[,"grp"] == "Cluster")],V2[,"vcov"][which(V2[,"grp"] == "ID")])/sum(V2[,"vcov"]),4)
cac_hg <- round((V2[,"vcov"][which(V2[,"grp"] == "Cluster")])/(V2[,"vcov"][which(V2[,"grp"] == "Cluster")] + V2[,"vcov"][which(V2[,"grp"] == "Period:Cluster")]),4)
# Estimate within-period ICC, IAC and decay rate(CAC) for exponential decay model
wpicc_ar <- round(V3$cond$Cluster[1,1]/((attr(V3$cond$ID,"stddev"))^2+ V3$cond$Cluster[1,1]+(attr(V3$cond,"sc"))^2),4)
ar1 <- round(attr(V3$cond$Cluster,"correlation")[1,2],4)
wiicc_ar <- round(((attr(V3$cond$ID,"stddev"))^2 + V3$cond$Cluster[1,1])/((attr(V3$cond$ID,"stddev"))^2+ V3$cond$Cluster[1,1]+(attr(V3$cond,"sc"))^2),3)
**Load your data properly in SAS;
**Exchangeable model;
**HH model;
PROC MIXED DATA=data;
CLASS Cluster Period ID Treatment;
MODEL Outcome = Period Treatment/ SOLUTION;
RANDOM int/SUBJECT=Cluster;
RANDOM int/SUBJECT=ID;
ods output covparms=HHcovparms fitstatistics=HHfitstatistics solutionf=HHsolutionf;
RUN;
**Hooper and Girling;
PROC MIXED DATA=data;
CLASS Cluster Period ID Treatment;
MODEL Outcome = Period Treatment/ SOLUTION;
RANDOM Cluster Period*Cluster;
RANDOM int/SUBJECT=ID;
ods output covparms=NEcovparms fitstatistics=NEfitstatistics solutionf=NEsolutionf;
RUN;
** Exponential decay;
** PROC HPMIXED can be used to improve the computational speed;
PROC MIXED DATA=data;
CLASS Cluster Period ID Treatment;
MODEL Outcome = Period Treatment/ SOLUTION;
RANDOM Period / SUBJECT=Cluster TYPE=AR(1);
RANDOM int/SUBJECT=ID;
RUN;
**ICC calculations based on the fitted models;
**Estimate within-period ICC for exchangeable model;
PROC SQL;
CREATE TABLE icchh AS
SELECT sum(estimate*(covparm='Cluster')) / sum(estimate) AS wpicc
FROM HHcovparms;
QUIT;
**Estimate within-, between-period ICC and cac for nested exchangeable model;
PROC SQL;
CREATE TABLE icchg AS
SELECT
sum(estimate*(covparm ^='Residual'))/sum(estimate) AS wpicc,
sum(estimate*(covparm='Cluster')) / sum(estimate) AS bpicc,
sum(estimate*(covparm='Cluster'))/ sum(estimate*(covparm ^='Residual')) AS cac
FROM Necovparms;
QUIT;
**Estimate within-period ICC and decay rate for exponential decay model;
PROC SQL;
CREATE TABLE iccar AS
SELECT
sum(estimate*(covparm='Variance')) / sum(estimate*(covparm ^='AR(1)')) AS wpicc,
sum(estimate*(covparm='AR(1)')) as ar1
FROM ARcovparms;
QUIT;
**Estimate within-period ICC for exchangeable model;
PROC SQL;
CREATE TABLE icchh AS
SELECT
sum(estimate*(subject='Cluster')) / sum(estimate) AS wpicc,
sum(estimate*(subject='Cluster') + estimate*(subject='ID')) / sum(estimate) AS wiicc
FROM HHcovparms;
QUIT;
**Estimate within-, between-period ICC and cac for nested exchangeable model;
PROC SQL;
CREATE TABLE icchg AS
SELECT
sum(estimate*(covparm='Cluster') + estimate*(covparm='Cluster*Period'))/sum(estimate) AS wpicc,
sum(estimate*(covparm='Cluster')) / sum(estimate) AS bpicc,
sum(estimate*(covparm='Cluster') + estimate*(covparm='Intercept')) / sum(estimate) AS wiicc,
sum(estimate*(covparm='Cluster'))/ sum(estimate*(covparm='Cluster') + estimate*(covparm='Cluster*Period')) AS cac
FROM Necovparms;
QUIT;
**Estimate within-period ICC and decay rate for exponential decay model;
PROC SQL;
CREATE TABLE iccar AS
SELECT
sum(estimate*(covparm='Variance')) / sum(estimate*(covparm ^='AR(1)')) AS wpicc,
sum(estimate*(covparm='Intercept') + estimate*(covparm='Variance')) / sum(estimate*(covparm ^='AR(1)')) AS wiicc,
sum(estimate*(covparm='AR(1)')) as ar1
FROM ARcovparms;
QUIT;
# Load your dataset
import delimited "replace it by the local path to your dataset", clear
# Exchangeable model
mixed outcome i.period_n i.treatment || cluster: || id:, reml
# Block exchangeable model
mixed outcome i.period_n treatment || cluster: || cluster: R.period_n || id:
# ICC calculations
. estat icc
# First properly load the dataset following our template
# Create a column for number of non-successes
data$non_suc <- data$Denominator-data$Numerator
# Create cluster ID for each individual
Cluster <- rep(data$Cluster, data$Denominator)
# Create period ID for each individual
Period <- rep(data$Period, data$Denominator)
# Create individual-level binary outcome
Outcome <- c()
for(i in 1:nrow(data)){
temp <- c(rep(1, data$Numerator[i]),rep(0, data$non_suc[i]))
Outcome <- c(Outcome, temp)
}
# Form an analytic dataset
data_analytic <- data.frame(Outcome = Outcome, Cluster = Cluster, Period = Period)
DATA data;
set data.analytic;
do i=1 to Numerator;
y=1;
output;
end;
do i= Numerator+1 to Denominator;
y=0;
output;
end;
n=1; /*for all, useful for counting # procedures per month per cluster later*/
run;
*imported dataset contains variables:
*cluster: cluster ID
*denominator: number of observations in each cluster
*events: number of events in each cluster
*This code creates individual level data for the events
clear
import excel "Path to the original dataset", sheet("Sheet1") firstrow
gen none_events=denominator-events
expandcl events, cluster( cluster) generate(outcome)
replace outcome=1
tempfile ipd_events
save `ipd_events'
*This code creates individual level data for the no events
This code creates individual level data for the events
clear
import excel ""path to dataset\data.xlsx", sheet("Sheet1") firstrow
gen none_events=denominator-events
expandcl events, cluster( cluster) generate(outcome)
replace outcome=1
tempfile ipd_events
save `ipd_events'
*This code creates individual level data for the no events
clear
import excel "path to dataset\data.xlsx", sheet("Sheet1") firstrow
gen none_events=denominator-events
expandcl none_events, cluster( cluster) generate(outcome)
replace outcome=0
tempfile ipd_noevents
save `ipd_noevents'
append using "`ipd_events'"