library(haven)
## Warning: package 'haven' was built under R version 4.5.2
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'tibble' was built under R version 4.5.2
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'readr' was built under R version 4.5.2
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'dplyr' was built under R version 4.5.2
## Warning: package 'stringr' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(data.table)
## Warning: package 'data.table' was built under R version 4.5.2
##
## Attaching package: 'data.table'
##
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, isoyear, mday, minute, month, quarter, second, wday,
## week, yday, year
##
## The following objects are masked from 'package:dplyr':
##
## between, first, last
##
## The following object is masked from 'package:purrr':
##
## transpose
library(readstata13)
setDTthreads(0)
getDTthreads(verbose = TRUE)
## OpenMP version (_OPENMP) 201511
## omp_get_num_procs() 8
## R_DATATABLE_NUM_PROCS_PERCENT unset (default 50)
## R_DATATABLE_NUM_THREADS unset
## R_DATATABLE_THROTTLE unset (default 1024)
## omp_get_thread_limit() 2147483647
## omp_get_max_threads() 8
## OMP_THREAD_LIMIT unset
## OMP_NUM_THREADS unset
## RestoreAfterFork true
## data.table is using 8 threads with throttle==1024. See ?setDTthreads.
## [1] 8
library(SEQTaRget)
#remotes::install_github("CausalInference/SEQTaRget", subdir = "SEQTaRget")
setwd("C:/Users/agn21/OneDrive/Desktop/Msc Epidemiology/6. Causal Inference/Week 20/R")
dat <- read.dta13("msm_hiv_for_practical.dta")
## Warning in read.dta13("msm_hiv_for_practical.dta"):
## Factor codes of type double or float detected in variables
##
## age_grp, baseline_cd4_grp, baseline_rna_grp,
## group, cd4_grp, rna_grp, lag_cd4_grp,
## lag_rna_grp
##
## No labels have been assigned.
## Set option 'nonint.factors = TRUE' to assign labels anyway.
head(dat)
## id month aidsordeath haart age_grp baseline_cd4_grp baseline_rna_grp
## 1 1 1 0 0 2 5 2
## 2 1 2 0 0 2 5 2
## 3 1 3 0 0 2 5 2
## 4 1 4 0 0 2 5 2
## 5 1 5 0 0 2 5 2
## 6 1 6 0 0 2 5 2
## baseline_bevent group cd4_grp rna_grp bevent lag_cd4_grp lag_rna_grp year
## 1 0 2 5 2 0 5 2 1999
## 2 0 2 5 2 0 5 2 1999
## 3 0 2 5 2 0 5 2 1999
## 4 0 2 5 2 0 5 2 1999
## 5 0 2 5 2 0 5 2 1999
## 6 0 2 5 2 0 5 2 1999
## laghaart timespl1 timespl2 timespl3
## 1 0 0.000000 0.0000000 0.0000000
## 2 0 -0.967033 -0.7362638 -0.2967033
## 3 0 -7.736264 -5.8901100 -2.3736265
## 4 0 -26.109890 -19.8791218 -8.0109892
## 5 0 -60.890110 -47.1208801 -18.9890118
## 6 0 -112.879120 -92.0329666 -37.0879135
dat <- dat |> arrange(id, month)
dat <- dat |>
transmute(
id,
month_start = month - 1, # edited to start at month 0 as needed for SEQTaRget package
month_end = month,
aidsordeath_int = as.integer(aidsordeath),
haart_int = as.integer(haart),
laghaart_int = as.integer(laghaart),
age_grp_fct = as_factor(age_grp),
group_fct = as_factor(group),
baseline_cd4_grp_fct = as_factor(baseline_cd4_grp),
baseline_rna_grp_fct = as_factor(baseline_rna_grp),
baseline_bevent_int = as.integer(baseline_bevent),
cd4_grp_fct = as_factor(cd4_grp),
rna_grp_fct = as_factor(rna_grp),
lag_cd4_grp_fct = as_factor(lag_cd4_grp),
lag_rna_grp_fct = as_factor(lag_rna_grp),
bevent_int = as.integer(bevent),
year_fct = as_factor(year),
# spline variables
timespl1, timespl2, timespl3,
lag_aidsordeath_int = as.integer(lag(aidsordeath_int)),
# eligibility variable
eligible = case_when(
haart_int == 0 & lag_aidsordeath_int == 0 ~ 1,
haart_int == 1 & laghaart_int == 0 & lag_aidsordeath_int == 0 ~ 1,
TRUE ~ 0
)
)
The dataset has the following variables:
# Explore the dataset
# Look at the data for an example patient
example_id <- 91
dat |>
filter(id == example_id) |>
select(c(id, month_start, month_end, aidsordeath_int, haart_int, rna_grp_fct))
## id month_start month_end aidsordeath_int haart_int rna_grp_fct
## 1 91 0 1 0 0 3
## 2 91 1 2 0 0 3
## 3 91 2 3 0 0 3
## 4 91 3 4 0 0 3
## 5 91 4 5 0 0 4
## 6 91 5 6 0 1 4
## 7 91 6 7 0 1 4
## 8 91 7 8 1 1 4
# Create the sequential trials and estimate a hazard ratio for the per protocol effect of ART using pooled logistic regression, accounting for the informative censoring
# define the options used in the SEQuential function
opts_haz_ipcw <- SEQopts(
hazard = TRUE,
weighted = TRUE,
weight.preexpansion = TRUE,
weight.eligible_cols = list("eligible"), # exclude already-treated rows
bootstrap = TRUE,
bootstrap.nboot = 20,
seed = 12345,
data.return = TRUE,
selection.random = TRUE,
selection.prob = 0.5
)
# Estimate the hazard ratio using informative censoring weights
model_haz_ipcw <- SEQuential(
data = dat,
id.col = "id",
time.col = "month_start",
eligible.col = "eligible",
treatment.col = "haart_int",
outcome.col = "aidsordeath_int",
time_varying.cols =
c("cd4_grp_fct", "rna_grp_fct", "lag_cd4_grp_fct",
"lag_rna_grp_fct", "bevent_int", "year_fct"),
fixed.cols =
c("baseline_cd4_grp_fct", "baseline_rna_grp_fct",
"baseline_bevent_int", "age_grp_fct", "group_fct"),
method = "censoring",
options = opts_haz_ipcw)
## Non-required columns provided, pruning for efficiency
## Pruned
## Expanding Data...
## Expansion Successful
## Moving forward with censoring analysis
## Bootstrapping with 80 % of data 20 times
## Completed
model_haz_ipcw@DT |>
filter(id == example_id) |>
arrange(id, trial, period) |>
select(id, trial, period, followup, haart_int, haart_int_bas, aidsordeath_int, weight) |>
print()
## id trial period followup haart_int haart_int_bas aidsordeath_int
## <int> <num> <int> <int> <fctr> <fctr> <int>
## 1: 91 0 0 0 0 0 0
## 2: 91 0 1 1 0 0 0
## 3: 91 0 2 2 0 0 0
## 4: 91 0 3 3 0 0 0
## 5: 91 0 4 4 0 0 0
## 6: 91 0 5 5 1 0 NA
## 7: 91 1 1 0 0 0 0
## 8: 91 1 2 1 0 0 0
## 9: 91 1 3 2 0 0 0
## 10: 91 1 4 3 0 0 0
## 11: 91 1 5 4 1 0 NA
## 12: 91 2 2 0 0 0 0
## 13: 91 2 3 1 0 0 0
## 14: 91 2 4 2 0 0 0
## 15: 91 2 5 3 1 0 NA
## 16: 91 3 3 0 0 0 0
## 17: 91 3 4 1 0 0 0
## 18: 91 3 5 2 1 0 NA
## 19: 91 4 4 0 0 0 0
## 20: 91 4 5 1 1 0 NA
## 21: 91 5 5 0 1 1 0
## 22: 91 5 6 1 1 1 0
## 23: 91 5 7 2 1 1 1
## id trial period followup haart_int haart_int_bas aidsordeath_int
## <int> <num> <int> <int> <fctr> <fctr> <int>
## weight
## <num>
## 1: 1.0000000
## 2: 1.0782659
## 3: 1.1271890
## 4: 1.1758135
## 5: 1.3535174
## 6: 0.4097814
## 7: 1.0782659
## 8: 1.1271890
## 9: 1.1758135
## 10: 1.3535174
## 11: 0.4097814
## 12: 1.0453720
## 13: 1.0904671
## 14: 1.2552724
## 15: 0.3800374
## 16: 1.0431378
## 17: 1.2007901
## 18: 0.3635427
## 19: 1.1511327
## 20: 0.3485088
## 21: 0.3027529
## 22: 0.3027529
## 23: 0.3027529
## weight
## <num>
# what covariates were used in the censoring models (numerator and denominator) and the outcome model?
covariates(model_haz_ipcw)
## $Outcome
## [1] "aidsordeath_int ~ haart_int_bas + followup + followup_sq + trial + trial_sq + baseline_cd4_grp_fct + baseline_rna_grp_fct + baseline_bevent_int + age_grp_fct + group_fct"
##
## $Numerator
## [1] "haart_int ~ baseline_cd4_grp_fct + baseline_rna_grp_fct + baseline_bevent_int + age_grp_fct + group_fct + month_start + month_start_sq"
##
## $Denominator
## [1] "haart_int ~ baseline_cd4_grp_fct + baseline_rna_grp_fct + baseline_bevent_int + age_grp_fct + group_fct + cd4_grp_fct + rna_grp_fct + lag_cd4_grp_fct + lag_rna_grp_fct + bevent_int + year_fct + month_start + month_start_sq"
# define the options used in the SEQuential function
opts_haz_unweighted <- SEQopts(
hazard = TRUE,
weighted = FALSE,
bootstrap = TRUE,
bootstrap.nboot = 20,
seed = 1234,
selection.random = TRUE,
selection.prob = 0.5
)
# Estimate the hazard ratio using informative censoring weights
model_haz_unweighted <- SEQuential(
data = dat,
id.col = "id",
time.col = "month_start",
eligible.col = "eligible",
treatment.col = "haart_int",
outcome.col = "aidsordeath_int",
time_varying.cols =
c("cd4_grp_fct", "rna_grp_fct", "lag_cd4_grp_fct",
"lag_rna_grp_fct", "bevent_int", "year_fct"),
fixed.cols =
c("baseline_cd4_grp_fct", "baseline_rna_grp_fct",
"baseline_bevent_int", "age_grp_fct", "group_fct"),
method = "censoring",
options = opts_haz_unweighted)
## Non-required columns provided, pruning for efficiency
## Pruned
## Expanding Data...
## Expansion Successful
## Moving forward with censoring analysis
## Bootstrapping with 80 % of data 20 times
## Completed
opts_surv_ipcw <- SEQopts(
km.curves = TRUE,
weighted = TRUE,
weight.preexpansion = TRUE,
weight.eligible_cols = list("eligible"), # exclude already-treated rows
bootstrap = TRUE,
bootstrap.nboot = 20,
seed = 1234,
selection.random = TRUE,
selection.prob = 0.5
)
model_surv_ipcw <- SEQuential(
data = dat,
id.col = "id",
time.col = "month_start",
eligible.col = "eligible",
treatment.col = "haart_int",
outcome.col = "aidsordeath_int",
time_varying.cols =
c("cd4_grp_fct", "rna_grp_fct", "lag_cd4_grp_fct",
"lag_rna_grp_fct", "bevent_int", "year_fct"),
fixed.cols =
c("baseline_cd4_grp_fct", "baseline_rna_grp_fct",
"baseline_bevent_int", "age_grp_fct", "group_fct"),
method = "censoring",
options = opts_surv_ipcw)
## Non-required columns provided, pruning for efficiency
## Pruned
## Expanding Data...
## Expansion Successful
## Moving forward with censoring analysis
## Bootstrapping with 80 % of data 20 times
## censoring model created successfully
## Creating Survival curves
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Completed
km_curve(model_surv_ipcw,
plot.type = "risk",
plot.labels = c("Untreated (no initiation)", "Treated (ART initiation)"),
plot.title = "",
plot.subtitle = "Cumulative incidence for the outcome AIDs or death"
)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## [[1]]
# risk at the end of follow up
risk_data(model_surv_ipcw)
## [[1]]
## Method A Risk 95% LCI 95% UCI SE
## <char> <char> <num> <num> <num> <num>
## 1: censoring 0 0.40533007 0.166774615 0.64388552 0.12171420
## 2: censoring 1 0.03322166 0.005306122 0.06113719 0.01424288
# risk difference and risk ratio at the end of follow-up
risk_comparison(model_surv_ipcw)
## [[1]]
## A_x A_y Risk Ratio RR 95% LCI RR 95% UCI Risk Differerence RD 95% LCI
## <fctr> <fctr> <num> <num> <num> <num> <num>
## 1: risk_0 risk_1 0.08196198 0.02938138 0.2286403 -0.3721084 -0.6122916
## 2: risk_1 risk_0 12.20077854 4.37368304 34.0351588 0.3721084 0.1319252
## RD 95% UCI
## <num>
## 1: -0.1319252
## 2: 0.6122916