DS1103-074 study is a First-in-human study for dose escalation of
DS1103a combination with T-DXd in subjects with advanced tumors.
For breast cancer, patients with HER2-expressing (e.g., IHC 1+ or
greater) or HER2-mutated (activating mutation) solid tumors are enrolled
in the study. DB-04 was the first phase 3 study to expand the T-DXd from
her2 positive to her2 low BC in late line setting (relative to chemo).
While DS1103-074 dose escalation data shows promising response rate
especially in CD47 positive population which fuels the further
development of the combination, the question of interest remains “does
the combo perform better than the monotherapy”. While general in/ex
criteria are similar between studies, the specific characteristics
associated with BC such as HER2/HR status, prior treatments
(chemo/endocrine/CDK4/6 etc.) and known baseline prognostic factors such
as ECOG, type of baseline metastasis etc can be different between the
two studies. While comparing the treatment effect, there are a few
things to consider: 1) baseline covariates imbalance, solution is
created matched/re-weighted datasets based on propensity score
(probability of receiving combo given the covariate values) 2) in what
population we want to estimate the treatment effect of combo vs mono,
current study, historical study, or both? The results from these
analyses may facilitate the decision of how to move the compound
forward.
#system("rm -rf /home/xjia/R/x86_64-pc-linux-gnu-library/4.4/00LOCK-MatchIt")
#list.files("/home/xjia/R/x86_64-pc-linux-gnu-library/4.4", pattern = "00LOCK", full.names = TRUE)
#remotes::install_github("kosukeimai/MatchIt")
.libPaths(c("/home/xjia/R/x86_64-pc-linux-gnu-library/4.4",
"/usr/local/lib/R/site-library",
"/usr/local/lib/R/library"))
rm(list=ls())
library(data.table)
packageVersion("data.table")
## [1] '1.17.0'
library(marginaleffects)
library(haven)
library(MatchIt)
library(tidyverse)
library(readxl)
library(writexl)
library(cobalt)
library(survival)
library(WeightIt)
This dataset contains N=354 BC pts from T-DXd arm in DB-04, including efficacy outcomes (ORR) and the baseline covariates mentioned in the pre-specified subgroup analyses section in the SAP. her2 status derived based on both IHC and ISH results, hormone receptor status (estrogen and progestrogen), number of prior line of chemo therapy in mets setting, prior CDK4/6 (depends on HR status), age,race,region, number of prior endocrine therapy in mets setting, best response to last prior cancer systemic therapy, CNS mets, renal function, hepatic function,baseline visceral disease, ECOG PS.
adsl2<-read_sas("/cdrdata/DS8201-A-U303/restricted/rstrct_data/rstrct_adam/20220328_csr/final_20220330/adsl.sas7bdat")
adrs2<-read_sas ("/cdrdata/DS8201-A-U303/restricted/rstrct_data/rstrct_adam/20220328_csr/final_20220330/adrs.sas7bdat")
adtte2<-read_sas ("/cdrdata/DS8201-A-U303/restricted/rstrct_data/rstrct_adam/20220328_csr/final_20220330/adtte.sas7bdat")
sub_sl2 <- adsl2%>%
select(SUBJID,USUBJID,TRT01A,HER2IC1,HER2IH1,ESTRCPT,ESTRCPTN,PROGESTR,PRGESTRN,HORMONR,HORMONRN,BESTRESP, ECOGBLN,
AGEGR2N,RACEN,REGION2,NCHMST,NREGEN,CNSYN,VISBL,RENALIP,HEPAIP,PRIORCDK)%>%
filter(TRT01A=="T-DXd"& (HER2IC1=="0" |HER2IC1=="1+"|(HER2IC1=="2+" & HER2IH1=="Negative") ))%>%
mutate(her2=ifelse(HER2IC1=="0",0,1),HORMONRN=ifelse(HORMONRN==1,1,0))%>%filter(her2==1)
count2<-sub_sl2%>%count(HORMONRN)
count3<-sub_sl2%>%count(VISBL)
count4<-sub_sl2%>%count(PRIORCDK) # this variable has 37 "", 93 "No" and 224 "Yes"
count5<-sub_sl2%>%count(NCHMST)
count6<-sub_sl2%>%count(NREGEN)
# retrieve ORR and PFS data from adrs and adtte
adrs21<-adrs2%>%select(USUBJID,PARAMCD,PARQUAL,AVALC,ADY)%>%filter(PARAMCD=='CBRSP' & PARQUAL=="INVESTIGATOR")%>%mutate(ORR=ifelse(AVALC %in% c("CR","PR"),1,0))%>%select(USUBJID,ORR)
adtte21<-adtte2%>%select(USUBJID,PARAMCD,PARQUAL,AVAL,CNSR)%>%filter(PARAMCD=='PFS1' & PARQUAL=="INVESTIGATOR")%>%mutate(PFSM=AVAL/30.25,CENSOR=1-CNSR)%>%select(USUBJID,PFSM,CENSOR)
db<-left_join(sub_sl2,adrs21,by='USUBJID')%>%left_join(adtte21,by="USUBJID")%>%mutate(TRTA=TRT01A,REGION=REGION2)%>%
select(USUBJID,TRTA,her2,HORMONRN,BESTRESP,ECOGBLN,AGEGR2N,RACEN,REGION,NCHMST,NREGEN,CNSYN,VISBL,RENALIP,HEPAIP,PRIORCDK,ORR,PFSM,CENSOR)
adsl1 <-read_sas("/cdrdata/DS1103-074/dev/transfer/labcorp/adam/20250930_bb_adhoc/final/adsl.sas7bdat")
adrs1 <-read_sas("/cdrdata/DS1103-074/dev/transfer/labcorp/adam/20250930_bb_adhoc/final/adrs.sas7bdat")
adtte1 <-read_sas("/cdrdata/DS1103-074/dev/transfer/labcorp/adam/20250930_bb_adhoc/final/adtte.sas7bdat")
cm1 <-read_sas("/cdrdata/DS1103-074/dev/transfer/labcorp/sdtm/20250930_bb_adhoc/final/cm.sas7bdat")
cmsupp1 <-read_sas ("/cdrdata/DS1103-074/dev/transfer/labcorp/sdtm/20250930_bb_adhoc/final/suppcm.sas7bdat")
lb1 <-read_sas ("/cdrdata/DS1103-074/dev/transfer/labcorp/sdtm/20250930_bb_adhoc/final/lb.sas7bdat")
mh1 <-read_sas ("/cdrdata/DS1103-074/dev/transfer/labcorp/sdtm/20250930_bb_adhoc/final/mh.sas7bdat")
mi1 <-read_sas ("/cdrdata/DS1103-074/dev/transfer/labcorp/sdtm/20250930_bb_adhoc/final/mi.sas7bdat")
ae1 <-read_sas ("/cdrdata/DS1103-074/dev/transfer/labcorp/sdtm/20250930_bb_adhoc/final/ae.sas7bdat")
tu1 <-read_sas ("/cdrdata/DS1103-074/dev/transfer/labcorp/sdtm/20250930_bb_adhoc/final/tu.sas7bdat")
supptu1 <-read_sas ("/cdrdata/DS1103-074/dev/transfer/labcorp/sdtm/20250930_bb_adhoc/final/supptu.sas7bdat")
cmclass <-read_excel ("/cdrdata/DS1103-074/dev/Statistics/indirect_comp/DS1103_074_Prior_Cancer_Systemic_Therapy_09302025_JA.xlsx",na="")
#Subset of adsl1
sub_sl1 <- adsl1%>%
mutate(flag=1)%>%
select(SUBJID,USUBJID,flag,FASFL,COHORT,HER2IHC, HER2ISH,ESTRCPTN,PRGESTRN,ECOGBLN,BRESPPS, AGEGR2N, RACE, RACEN,REGION1)%>%
filter(FASFL=="Y" )
#& (HER2IHC=="0"| HER2IHC=="1+"|(HER2IHC=="2+" & HER2ISH=="Negative")
# get MI data and retrieve HER2 and HR status from MI data
mi11<-mi1%>%select(USUBJID,MISEQ,MITESTCD,MITEST,MICAT,MISTRESC)%>%
filter(MICAT %in% c("PRIOR BIOMARKER HISTORY","PRIOR BREAST CANCER HISTORY") & (MITESTCD %in% c("HER2","HER2GAM","ESTRCPT","PROGESTR")))%>%
select(USUBJID,MITESTCD,MISTRESC)
mi_wide<-mi11 %>%
pivot_wider(
names_from = MITESTCD,
values_from = MISTRESC
)%>%mutate(HR=ifelse(ESTRCPT=="Positive" |PROGESTR=="Positive",1,0))
sl11<-sub_sl1%>%left_join(mi_wide,by="USUBJID")%>%
filter(HER2=="0"|HER2=="1+"|(HER2=="2+" & HER2GAM=="Negative"))
#%>%filter(HER2IHC !=HER2) # subject 33020002 has HER2 in MI data but empty in HER2IHC in ADSL
mh11<-mh1%>%select(USUBJID,MHTERM)%>%filter(MHTERM=="BREAST CANCER")
base1<-inner_join(sub_sl1, mh11,by="USUBJID")
#base1 <-inner_join(sl11,mh11,by="USUBJID")
base2<-base1%>%mutate(hr=ifelse((ESTRCPTN==1 | PRGESTRN==1),1,0), her2=ifelse(HER2IHC=="0",0,1))
#base11<-base1%>%select(USUBJID,HER2,HER2GAM,ESTRCPT,PROGESTR)
#base2 <- base1 %>%
# mutate(
# HORMONRN = HR,
# her2_low = if_else(
# HER2 %in% c("1+") | (HER2 == "2+" & HER2GAM == "Negative"),
# 1, 0)
#) %>%
#filter(her2_low ==1)
# retrieve number of prior lines of chemotherapy (1, >=2) in the metastatic setting; This is not accurate. Need clinical input on which lines are chemo
sub_cm1<-cm1%>%
select(USUBJID,CMGRPID,CMDECOD,CMINDC,CMCAT)%>%
filter(CMCAT=="PRIOR CANCER SYSTEMIC THERAPY" & (CMINDC=="METASTATIC" |CMINDC=="LOCALLY ADVANCED"))
cmclass1<-cmclass%>%
distinct(CMDECOD, .keep_all = TRUE)
cm2<-left_join(sub_cm1,cmclass1,by="CMDECOD")
# count number of chemo, CDK4/6, endocrine per subject
cm_counts <- cm2 %>%
group_by(USUBJID) %>%
summarize(
chemo = sum(CHEMOFL == "Y", na.rm = TRUE),
cdk = sum(CDK6FL =="Y",na.rm=TRUE),
endo =sum(ENDOCRFL =="Y",na.rm=TRUE)
)
base3<-left_join(base2, cm_counts,by="USUBJID")
# Reported history of CNS metastases (yes, no)
mh12<-mh1%>%select(USUBJID,MHSPID,MHTERM)%>%filter(MHTERM=="BRAIN METASTASIS")%>%
group_by(USUBJID)%>%
arrange(desc(MHSPID),.by_group=TRUE)%>%
slice(1)%>%
ungroup()%>%mutate(brain=MHTERM)%>%select(USUBJID,brain)
base4<-left_join(base3,mh12,by="USUBJID")
## renal function at baseline determined by the baseline creatinine clearance:
lb11<-lb1%>%select(USUBJID,LBTESTCD,LBORRES,LBBLFL)%>%filter(LBTESTCD=="CREATCLR" & LBBLFL=="Y")%>%mutate(val=as.numeric(LBORRES))%>%
mutate(renal_level=cut(val,breaks=c(-Inf,15,30,60,90,Inf),labels=c("End","Severe","Moderate","Mild","Normal"),include.lowest=TRUE))%>%select(USUBJID,renal_level)
base5<-left_join(base4,lb11,by="USUBJID")
##hepatic function at baseline determined by the baseline total bilirubin, AST, Gilbert syndrome (AE data)
lb12<-lb1%>%select(USUBJID,LBTESTCD,LBORRES,LBORNRHI,LBBLFL,LBDY)%>%filter(LBTESTCD=="AST" & LBBLFL=="Y")%>%
mutate(AST=LBORRES,ASTULN=as.numeric(LBORNRHI),ASTDY=LBDY)%>%select(USUBJID,AST,ASTULN,ASTDY)
lb13<-lb1%>%select(USUBJID,LBTESTCD,LBCAT,LBORRES,LBORNRHI, LBBLFL,LBDY)%>%filter(LBTESTCD=="BILI" &LBBLFL=="Y" &LBCAT=='CHEMISTRY')%>%
mutate(BILI=LBORRES,BILIULN=as.numeric(LBORNRHI),BILIDY=LBDY)%>%select(USUBJID,BILI,BILIULN,BILIDY)
gil<-ae1%>%select(USUBJID,AEPTCD,AEDECOD,AESTDY,AEENDY)%>%filter(AEPTCD==10018267)
hepa<-inner_join(lb12,lb13,gil,by="USUBJID")%>%mutate(hepa_level=case_when(
BILI<=BILIULN & AST<=ASTULN ~ "Normal",
(BILI>BILIULN & BILI<=(1.5*BILIULN)) | (BILI<=BILIULN & AST>ASTULN) ~ "Mild",
BILI>(1.5*BILIULN) & BILI<=(3*BILIULN) ~"Moderate",
BILI> (3*BILIULN) ~ "Severe",
TRUE ~"Other/NA"
)
)%>%select(USUBJID,hepa_level)
base6<-left_join(base5,hepa,by="USUBJID")
## baseline visceral disease (yes, no) determined by any target or non-target tumor except "Breast","Skin","Slymph Node", and "Bone" in the location on
# the "Target/Non-Target Tumor Assessments(imaging baseline)" CRF page (See appendix 1.1 in DB-04 SAP)
tu11<-tu1%>%select(USUBJID,TUSEQ,TUSTRESC,TUBLFL,TULOC,TUDY)%>%filter(TUBLFL=="Y")
supptu11<-supptu1%>%select(USUBJID,IDVAR,IDVARVAL,QNAM,QVAL)%>%filter(QNAM=="TULOCOTH")%>%mutate(TUSEQ=as.numeric(IDVARVAL))%>%select(USUBJID,TUSEQ,QVAL)
tu12<-left_join(tu11,supptu11,by=c("USUBJID","TUSEQ"))%>%mutate(
visceral=ifelse(TULOC %in% c("Back","Bone","Breast","Cervical Vertebra","Chest wall","Extremity","Head","Head (excluding Oral Cavity)","Larynx","Lumbar Vertebra","Lymph Nodes",
"Muscle","Nasopharynx","Neck","Nose","Orophyarynx","Other","Pelvic Bone","Penis","Rib","Skin","Skull","Soft Tissue","Subcutaneous","Thoracic Vertebra"),0,1)
)%>%select(USUBJID,visceral)
#cmclass <-read_excel ("/cdrdata/DS1103-074/dev/Statistics/indirect_comp/DS1103_074_Prior_Cancer_Systemic_Therapy_09302025_JA.xlsx",na="")
#write_xlsx(tu12, "/cdrdata/DS1103-074/dev/Statistics/indirect_comp/visceral.xlsx")
base7<-left_join(base6,tu12,by="USUBJID")
# retrieve ORR and PFS data from adrs and adtte
adrs11<-adrs1%>%select(USUBJID,PARAMCD,PARQUAL,AVALC,ADY)%>%filter(PARAMCD=='CBRSP' & PARQUAL=="INVESTIGATOR")%>%mutate(ORR=ifelse(AVALC %in% c("CR","PR"),1,0))%>%select(USUBJID,ORR)
adtte11<-adtte1%>%select(USUBJID,PARAMCD,PARQUAL,AVALM,CNSR)%>%filter(PARAMCD=='PFS1' & PARQUAL=="INVESTIGATOR")%>%mutate(PFSM=AVALM,CENSOR=1-CNSR)%>%select(USUBJID,PFSM,CENSOR)
base8<-left_join(base7,adrs11,by='USUBJID')%>%left_join(adtte11,by="USUBJID")%>%
mutate(CNSYN=ifelse(brain %in% "BRAIN METASTASIS","Y","N"),RENALIP=renal_level,HEPAIP=hepa_level,
NCHMST=chemo,NREGEN=endo,PRIORCDK=ifelse(cdk>=1,"Yes","No"),VISBL=ifelse(visceral==1,"Y","N"),HORMONRN=hr,TRT01A="Combo",BESTRESP=BRESPPS,TRTA=TRT01A,REGION=REGION1)%>%
select(USUBJID,TRTA,her2,HORMONRN,ECOGBLN,AGEGR2N,REGION,RACEN,CNSYN,NCHMST,NREGEN,PRIORCDK,VISBL,RENALIP,HEPAIP,BESTRESP,ORR,PFSM,CENSOR)
all<-bind_rows(base8,db)%>%mutate(trt=ifelse(TRTA=="T-DXd",0,1))%>%mutate(presp=ifelse(BESTRESP %in% c("CR","PR"),1,0),
white=ifelse(RACEN==1,1,0),renal=ifelse(RENALIP %in% c("Normal"),1,0),hepa=ifelse(HEPAIP %in% c("Normal"),1,0),endo=ifelse(NREGEN>=3,3,NREGEN), pchemo=ifelse(NCHMST>=2,2,NCHMST),priorcdk=ifelse(PRIORCDK=="Yes","Yes","Other"),
region=ifelse(REGION =="North America", 1,0) )
#endo=ifelse(endo>=3,3,endo), pchemo=ifelse(NCHMST>=2,2,NCHMST),priorcdk=ifelse(PRIORCDK=="Yes","Yes","Other") # use original continuous scale for prior chemo and prior endo
The data from DS1103 and DB-04 will be exactly matched on her2 low status and hormone receptor positive/negative status. Due to the sample size constraint espeically from DS1103-074 study, DS1103 clinical team prioritized four most important variables to be considered among the rest variables: number of prior line of chemo in metastatic setting, number of prior lines of endocrine therapy, baseline visceral disease Yes vs No, and prior CDK4/6 Yes or No.
We will first display the distributions of these variables between her2 low patient population from DS1103 and DB-04 studies, and collaps in case sparse, and then construct propensity score models using both exact matched variables (HR status), and the rest prioritized variables using one of the following methods: 1) 1:k nearest matching (k=2,3,4) 2) 1:k optimal matching (k=2,3,4) 3) IPW weighting based on estimated propensity scores
In a randomized study, the probability of each subject receiving treatment is 0.5, irrespective of covariates, due to randomization. In the indirect comparison, the probability of each subject receiving treatment conditional on covariates varies. The purpose of creating the matched datasets from control is to ensure treatment assignment is irrelevant with the propensity scores among the matched pairs. The assumption of this method is that there are no unmeasured confounders, which is not valid in this analysis, because due to sample size constraint we only included a few prioritzed variabels in the model and left out most of the covariates. Another assumption is the included covariates have linear association with the logit of the probability of receviing treatment.
#remove missing baseline visceral mets in DB-04
table(all$HORMONRN, all$trt, useNA = "ifany")
##
## 0 1
## 0 36 2
## 1 318 16
table(all$AGEGR2N,all$trt,useNA="ifany")
##
## 0 1
## 0 276 14
## 1 78 4
table(all$region,all$trt,useNA="ifany") # collapse "europe" and "europe + israel"
##
## 0 1
## 0 303 10
## 1 51 8
table(all$white, all$trt, useNA = "ifany")## need to collapse Race into white vs non-white
##
## 0 1
## 0 190 5
## 1 164 13
table(all$CNSYN, all$trt, useNA = "ifany") # this variable has perfect separation, and needs to be dropped
##
## 0 1
## N 319 0
## Y 35 18
table(all$pchemo, all$trt, useNA = "ifany") # this variable collapse into 0,1,>=2
##
## 0 1
## 0 1 2
## 1 208 6
## 2 145 10
table(all$endo, all$trt, useNA ="ifany") # this variable collapse into 0,1,2,>=3
##
## 0 1
## 0 24 4
## 1 71 5
## 2 113 5
## 3 146 4
table(all$priorcdk,all$trt,useNA="ifany") # DB-04 has 37 records empty, treated as No?
##
## 0 1
## Other 130 4
## Yes 224 14
table(all$VISBL,all$trt, useNA="ifany")
##
## 0 1
## N 39 3
## Y 315 15
table(all$renal, all$trt, useNA = "ifany")# collapse normal vs others
##
## 0 1
## 0 161 6
## 1 193 12
table(all$hepa, all$trt, useNA = "ifany") # collapse normal vs others
##
## 0 1
## 0 193 7
## 1 161 11
table(all$presp, all$trt, useNA = "ifany")
##
## 0 1
## 0 307 14
## 1 47 4
table(all$ECOGBLN, all$trt, useNA = "ifany")
##
## 0 1
## 0 192 7
## 1 162 11
table(all$NCHMST, all$trt, useNA = "ifany")
##
## 0 1
## 0 1 2
## 1 208 6
## 2 139 6
## 3 6 3
## 4 0 1
table(all$ORR, all$trt,useNA="ifany")
##
## 0 1
## 0 173 9
## 1 181 9
ps_formula <- trt ~ HORMONRN+ NCHMST + endo + VISBL + priorcdk
## check baseline balance measure
bal_un <- cobalt::bal.tab(ps_formula, data = all, estimand = "ATT", binary = "std")
bal_un
## Balance Measures
## Type Diff.Un
## HORMONRN Binary -0.0300
## NCHMST Contin. 0.2779
## endo Contin. -0.5248
## VISBL_Y Binary -0.1516
## priorcdk_Yes Binary 0.3488
##
## Sample sizes
## Control Treated
## All 354 18
love.plot(bal_un, abs = TRUE, thresholds = c(m = 0.1)) +
ggplot2::ggtitle("Unadjusted balance (SMD)")
## estimate ATT weights
W_att <- weightit(
formula = ps_formula,
data = all,
method = "glm",
estimand = "ATT"
)
W_att
## A weightit object
## - method: "glm" (propensity score weighting with GLM)
## - number of obs.: 372
## - sampling weights: none
## - treatment: 2-category
## - estimand: ATT (focal: 1)
## - covariates: HORMONRN, NCHMST, endo, VISBL, priorcdk
summary(W_att)
## Summary of weights
##
## - Weight ranges:
##
## Min Max
## treated 1.0000 || 1.0000
## control 0.0021 |---------------| 0.5702
##
## - Units with the 5 most extreme weights by group:
##
## 5 4 3 2 1
## treated 1 1 1 1 1
## 186 90 21 352 133
## control 0.3172 0.3172 0.3172 0.3963 0.5702
##
## - Weight statistics:
##
## Coef of Var MAD Entropy # Zeros
## treated 0.000 0.000 0.000 0
## control 1.312 0.796 0.533 0
##
## - Effective Sample Sizes:
##
## Control Treated
## Unweighted 354. 18
## Weighted 130.36 18
bal_w <- cobalt::bal.tab(W_att, un = TRUE, binary = "std")
bal_w
## Balance Measures
## Type Diff.Un Diff.Adj
## prop.score Distance 0.6234 0.2323
## HORMONRN Binary -0.0300 -0.0153
## NCHMST Contin. 0.2779 0.1171
## endo Contin. -0.5248 -0.0351
## VISBL_Y Binary -0.1516 -0.0425
## priorcdk_Yes Binary 0.3488 -0.0342
##
## Effective sample sizes
## Control Treated
## Unadjusted 354. 18
## Adjusted 130.36 18
love.plot(bal_w, abs = TRUE, thresholds = c(m = 0.1)) +
ggplot2::ggtitle("Balance after ATT weighting (SMD)")
# Estimate propensity scores from the same PS model used for weighting
ps_fit <- glm(ps_formula, data = all, family = binomial())
all$ps <- predict(ps_fit, type = "response")
# Attach ATT weights (treated ~1; controls reweighted)
all$w_att <- W_att$weights
# Unweighted PS overlap
p_unw <- ggplot(all, aes(x = ps, fill = factor(trt))) +
geom_density(alpha =1,adjust=5,linewidth=1) +
coord_cartesian(xlim = c(0, 1)) +
labs(
title = "Propensity score overlap (unweighted)",
x = "Estimated propensity score",
fill = "trt (1=treated)"
)
# Weighted PS overlap (ATT weights)
p_w <- ggplot(all, aes(x = ps, weight = w_att, fill = factor(trt))) +
geom_density(alpha = 1,adjust=5,linewidth=1) +
coord_cartesian(xlim = c(0, 1)) +
labs(
title = "Propensity score overlap after ATT weighting",
x = "Estimated propensity score",
fill = "trt (1=treated)"
)
p_unw
p_w
# Optional numeric summaries: treated PS range and fraction of controls inside it
tmin <- min(all$ps[all$trt == 1])
tmax <- max(all$ps[all$trt == 1])
c(
treated_ps_min = tmin,
treated_ps_max = tmax,
prop_controls_within_treated_range =
mean(all$ps[all$trt == 0] >= tmin & all$ps[all$trt == 0] <= tmax)
)
## treated_ps_min treated_ps_max
## 0.005702741 0.653808357
## prop_controls_within_treated_range
## 0.920903955
fit_orr_w <- glm_weightit(
ORR ~ trt * (NCHMST + endo + VISBL + priorcdk),
data = all,
family = binomial(),
weightit = W_att,
vcov = "asympt" # if not available, glm_weightit defaults to HC0
)
eff_orr_w <- avg_comparisons(
fit_orr_w,
variables = "trt",
newdata = subset(all,trt == 1),
type = "response", # probability scale => risk difference
vcov = TRUE # use the vcov stored in fit_y
)
eff_orr_w
##
## Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
## -0.012 0.0996 -0.12 0.905 0.1 -0.207 0.183
##
## Term: trt
## Type: response
## Comparison: 1 - 0
m.out <- matchit(
trt ~ NCHMST+endo+VISBL+priorcdk+HORMONRN,
data = all,
method = "nearest", # nearest neighbor matching
distance = "glm", # logistic PS
link="logit",
exact = ~ HORMONRN, # 🔑 enforce exact matching
ratio = 1, # e.g., 1:3 matching
#caliper = 0.2 # optional (on logit scale)
)
#obtain the matched dataset
matched_data <- match.data(m.out)
#check covariate balance
summary(m.out)
##
## Call:
## matchit(formula = trt ~ NCHMST + endo + VISBL + priorcdk + HORMONRN,
## data = all, method = "nearest", distance = "glm", link = "logit",
## exact = ~HORMONRN, ratio = 1)
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.1443 0.0435 0.6234 10.9978 0.2330
## NCHMST 1.7222 1.4237 0.2779 4.0543 0.1030
## endo 1.5000 2.0763 -0.5248 1.3690 0.1441
## VISBLN 0.1667 0.1102 0.1516 . 0.0565
## VISBLY 0.8333 0.8898 -0.1516 . 0.0565
## priorcdkOther 0.2222 0.3672 -0.3488 . 0.1450
## priorcdkYes 0.7778 0.6328 0.3488 . 0.1450
## HORMONRN 0.8889 0.8983 -0.0300 . 0.0094
## eCDF Max
## distance 0.4492
## NCHMST 0.2053
## endo 0.2316
## VISBLN 0.0565
## VISBLY 0.0565
## priorcdkOther 0.1450
## priorcdkYes 0.1450
## HORMONRN 0.0094
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.1443 0.1172 0.1673 2.5395 0.0145
## NCHMST 1.7222 1.5556 0.1552 3.0431 0.0778
## endo 1.5000 1.5000 0.0000 1.1081 0.0278
## VISBLN 0.1667 0.2222 -0.1491 . 0.0556
## VISBLY 0.8333 0.7778 0.1491 . 0.0556
## priorcdkOther 0.2222 0.2222 0.0000 . 0.0000
## priorcdkYes 0.7778 0.7778 0.0000 . 0.0000
## HORMONRN 0.8889 0.8889 0.0000 . 0.0000
## eCDF Max Std. Pair Dist.
## distance 0.1111 0.1707
## NCHMST 0.1667 0.4655
## endo 0.0556 0.4444
## VISBLN 0.0556 0.7454
## VISBLY 0.0556 0.7454
## priorcdkOther 0.0000 0.2222
## priorcdkYes 0.0000 0.2222
## HORMONRN 0.0000 0.0000
##
## Sample Sizes:
## Control Treated
## All 354 18
## Matched 18 18
## Unmatched 336 0
## Discarded 0 0
b <- bal.tab(
m.out,
un = TRUE,
binary = "std", # force standardized differences for binary vars
continuous = "std" # explicit, though this is already the default
)
love.plot(
b,
stats = "mean.diffs",
abs = TRUE,
threshold = 0.1,
title="1:1 nearest matching"
)
## using matched set to estimate trt effect using g computation
fit_orr_m <- glm(
ORR ~ trt * (NCHMST+endo+VISBL+priorcdk+HORMONRN),
data = matched_data,
family = binomial(),
weights = matched_data$weights
)
eff_orr_m <- avg_comparisons(
fit_orr_m,
variables = "trt",
newdata = subset(trt == 1),
type = "response",
vcov = ~subclass
)
eff_orr_m
##
## Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
## -0.0144 0.121 -0.119 0.905 0.1 -0.251 0.222
##
## Term: trt
## Type: response
## Comparison: 1 - 0
m.out <- matchit(
trt ~ NCHMST+endo+VISBL+priorcdk+HORMONRN,
data = all,
method = "nearest", # nearest neighbor matching
distance = "glm", # logistic PS
link="logit",
exact = ~ HORMONRN, # 🔑 enforce exact matching
ratio = 2, # e.g., 1:3 matching
#caliper = 0.2 # optional (on logit scale)
)
#obtain the matched dataset
matched_data <- match.data(m.out)
#check covariate balance
summary(m.out)
##
## Call:
## matchit(formula = trt ~ NCHMST + endo + VISBL + priorcdk + HORMONRN,
## data = all, method = "nearest", distance = "glm", link = "logit",
## exact = ~HORMONRN, ratio = 2)
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.1443 0.0435 0.6234 10.9978 0.2330
## NCHMST 1.7222 1.4237 0.2779 4.0543 0.1030
## endo 1.5000 2.0763 -0.5248 1.3690 0.1441
## VISBLN 0.1667 0.1102 0.1516 . 0.0565
## VISBLY 0.8333 0.8898 -0.1516 . 0.0565
## priorcdkOther 0.2222 0.3672 -0.3488 . 0.1450
## priorcdkYes 0.7778 0.6328 0.3488 . 0.1450
## HORMONRN 0.8889 0.8983 -0.0300 . 0.0094
## eCDF Max
## distance 0.4492
## NCHMST 0.2053
## endo 0.2316
## VISBLN 0.0565
## VISBLY 0.0565
## priorcdkOther 0.1450
## priorcdkYes 0.1450
## HORMONRN 0.0094
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.1443 0.1155 0.1780 2.9294 0.0163
## NCHMST 1.7222 1.6389 0.0776 3.2811 0.0722
## endo 1.5000 1.5278 -0.0253 1.2068 0.0347
## VISBLN 0.1667 0.1389 0.0745 . 0.0278
## VISBLY 0.8333 0.8611 -0.0745 . 0.0278
## priorcdkOther 0.2222 0.2222 0.0000 . 0.0000
## priorcdkYes 0.7778 0.7778 0.0000 . 0.0000
## HORMONRN 0.8889 0.8889 0.0000 . 0.0000
## eCDF Max Std. Pair Dist.
## distance 0.1667 0.2122
## NCHMST 0.1667 0.4397
## endo 0.0833 0.3794
## VISBLN 0.0278 0.6708
## VISBLY 0.0278 0.6708
## priorcdkOther 0.0000 0.2222
## priorcdkYes 0.0000 0.2222
## HORMONRN 0.0000 0.0000
##
## Sample Sizes:
## Control Treated
## All 354 18
## Matched 36 18
## Unmatched 318 0
## Discarded 0 0
b <- bal.tab(
m.out,
un = TRUE,
binary = "std", # force standardized differences for binary vars
continuous = "std" # explicit, though this is already the default
)
love.plot(
b,
stats = "mean.diffs",
abs = TRUE,
threshold = 0.1,
title="1:2 nearest matching"
)
## using matched set to estimate trt effect using g computation
fit_orr_m <- glm(
ORR ~ trt * (NCHMST+endo+VISBL+priorcdk+HORMONRN),
data = matched_data,
family = binomial(),
weights = matched_data$weights
)
eff_orr_m <- avg_comparisons(
fit_orr_m,
variables = "trt",
newdata = subset(trt == 1),
type = "response",
vcov = ~subclass
)
eff_orr_m
##
## Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
## 0.00449 0.0974 0.0461 0.963 0.1 -0.186 0.195
##
## Term: trt
## Type: response
## Comparison: 1 - 0
m.out <- matchit(
trt ~ NCHMST+endo+VISBL+priorcdk+HORMONRN,
data = all,
method = "nearest", # nearest neighbor matching
distance = "glm", # logistic PS
link="logit",
exact = ~ HORMONRN, # 🔑 enforce exact matching
ratio = 3, # e.g., 1:3 matching
#caliper = 0.2 # optional (on logit scale)
)
#obtain the matched dataset
matched_data <- match.data(m.out)
#check covariate balance
summary(m.out)
##
## Call:
## matchit(formula = trt ~ NCHMST + endo + VISBL + priorcdk + HORMONRN,
## data = all, method = "nearest", distance = "glm", link = "logit",
## exact = ~HORMONRN, ratio = 3)
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.1443 0.0435 0.6234 10.9978 0.2330
## NCHMST 1.7222 1.4237 0.2779 4.0543 0.1030
## endo 1.5000 2.0763 -0.5248 1.3690 0.1441
## VISBLN 0.1667 0.1102 0.1516 . 0.0565
## VISBLY 0.8333 0.8898 -0.1516 . 0.0565
## priorcdkOther 0.2222 0.3672 -0.3488 . 0.1450
## priorcdkYes 0.7778 0.6328 0.3488 . 0.1450
## HORMONRN 0.8889 0.8983 -0.0300 . 0.0094
## eCDF Max
## distance 0.4492
## NCHMST 0.2053
## endo 0.2316
## VISBLN 0.0565
## VISBLY 0.0565
## priorcdkOther 0.1450
## priorcdkYes 0.1450
## HORMONRN 0.0094
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.1443 0.1048 0.2442 3.6456 0.0262
## NCHMST 1.7222 1.5370 0.1724 3.1474 0.0741
## endo 1.5000 1.5000 0.0000 1.2911 0.0463
## VISBLN 0.1667 0.1111 0.1491 . 0.0556
## VISBLY 0.8333 0.8889 -0.1491 . 0.0556
## priorcdkOther 0.2222 0.2222 0.0000 . 0.0000
## priorcdkYes 0.7778 0.7778 0.0000 . 0.0000
## HORMONRN 0.8889 0.8889 0.0000 . 0.0000
## eCDF Max Std. Pair Dist.
## distance 0.1852 0.2674
## NCHMST 0.1852 0.4828
## endo 0.0926 0.3704
## VISBLN 0.0556 0.5466
## VISBLY 0.0556 0.5466
## priorcdkOther 0.0000 0.2222
## priorcdkYes 0.0000 0.2222
## HORMONRN 0.0000 0.0000
##
## Sample Sizes:
## Control Treated
## All 354 18
## Matched 54 18
## Unmatched 300 0
## Discarded 0 0
b <- bal.tab(
m.out,
un = TRUE,
binary = "std", # force standardized differences for binary vars
continuous = "std" # explicit, though this is already the default
)
love.plot(
b,
stats = "mean.diffs",
abs = TRUE,
threshold = 0.1,
title="1:3 nearest matching"
)
## using matched set to estimate trt effect using g computation
fit_orr_m <- glm(
ORR ~ trt * (NCHMST+endo+VISBL+priorcdk+HORMONRN),
data = matched_data,
family = binomial(),
weights = matched_data$weights
)
eff_orr_m <- avg_comparisons(
fit_orr_m,
variables = "trt",
newdata = subset(trt == 1),
type = "response",
vcov = ~subclass
)
eff_orr_m
##
## Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
## -0.02 0.0988 -0.203 0.84 0.3 -0.214 0.174
##
## Term: trt
## Type: response
## Comparison: 1 - 0
m.out <- matchit(
trt ~NCHMST+endo+VISBL+priorcdk+HORMONRN,
data = all,
method = "optimal", # nearest neighbor matching
distance = "glm", # logistic PS
link="logit",
exact = ~ HORMONRN, # 🔑 enforce exact matching
ratio=1
#caliper = 0.2 # optional (on logit scale)
)
#obtain the matched dataset
matched_data <- match.data(m.out)
#check covariate balance
summary(m.out)
##
## Call:
## matchit(formula = trt ~ NCHMST + endo + VISBL + priorcdk + HORMONRN,
## data = all, method = "optimal", distance = "glm", link = "logit",
## exact = ~HORMONRN, ratio = 1)
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.1443 0.0435 0.6234 10.9978 0.2330
## NCHMST 1.7222 1.4237 0.2779 4.0543 0.1030
## endo 1.5000 2.0763 -0.5248 1.3690 0.1441
## VISBLN 0.1667 0.1102 0.1516 . 0.0565
## VISBLY 0.8333 0.8898 -0.1516 . 0.0565
## priorcdkOther 0.2222 0.3672 -0.3488 . 0.1450
## priorcdkYes 0.7778 0.6328 0.3488 . 0.1450
## HORMONRN 0.8889 0.8983 -0.0300 . 0.0094
## eCDF Max
## distance 0.4492
## NCHMST 0.2053
## endo 0.2316
## VISBLN 0.0565
## VISBLY 0.0565
## priorcdkOther 0.1450
## priorcdkYes 0.1450
## HORMONRN 0.0094
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.1443 0.1169 0.1690 2.5248 0.0145
## NCHMST 1.7222 1.6667 0.0517 3.2685 0.0778
## endo 1.5000 1.6111 -0.1012 1.0110 0.0278
## VISBLN 0.1667 0.2222 -0.1491 . 0.0556
## VISBLY 0.8333 0.7778 0.1491 . 0.0556
## priorcdkOther 0.2222 0.2222 0.0000 . 0.0000
## priorcdkYes 0.7778 0.7778 0.0000 . 0.0000
## HORMONRN 0.8889 0.8889 0.0000 . 0.0000
## eCDF Max Std. Pair Dist.
## distance 0.1111 0.1709
## NCHMST 0.1667 0.5690
## endo 0.0556 0.3035
## VISBLN 0.0556 0.4472
## VISBLY 0.0556 0.4472
## priorcdkOther 0.0000 0.2222
## priorcdkYes 0.0000 0.2222
## HORMONRN 0.0000 0.0000
##
## Sample Sizes:
## Control Treated
## All 354 18
## Matched 18 18
## Unmatched 336 0
## Discarded 0 0
b <- bal.tab(
m.out,
un = TRUE,
binary = "std", # force standardized differences for binary vars
continuous = "std" # explicit, though this is already the default
)
love.plot(
b,
stats = "mean.diffs",
abs = TRUE,
threshold = 0.05,
title="1:1 optimal matching"
)
## using matched set to estimate trt effect using g computation
fit_orr_m <- glm(
ORR ~ trt * (NCHMST+endo+VISBL+priorcdk+HORMONRN),
data = matched_data,
family = binomial(),
weights = matched_data$weights
)
eff_orr_m <- avg_comparisons(
fit_orr_m,
variables = "trt",
newdata = subset(trt == 1),
type = "response",
vcov = ~subclass
)
eff_orr_m
##
## Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
## -0.086 0.124 -0.692 0.489 1.0 -0.329 0.157
##
## Term: trt
## Type: response
## Comparison: 1 - 0
m.out <- matchit(
trt ~NCHMST+endo+VISBL+priorcdk+HORMONRN,
data = all,
method = "optimal", # nearest neighbor matching
distance = "glm", # logistic PS
link="logit",
exact = ~ HORMONRN, # 🔑 enforce exact matching
ratio=2
#caliper = 0.2 # optional (on logit scale)
)
#obtain the matched dataset
matched_data <- match.data(m.out)
#check covariate balance
summary(m.out)
##
## Call:
## matchit(formula = trt ~ NCHMST + endo + VISBL + priorcdk + HORMONRN,
## data = all, method = "optimal", distance = "glm", link = "logit",
## exact = ~HORMONRN, ratio = 2)
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.1443 0.0435 0.6234 10.9978 0.2330
## NCHMST 1.7222 1.4237 0.2779 4.0543 0.1030
## endo 1.5000 2.0763 -0.5248 1.3690 0.1441
## VISBLN 0.1667 0.1102 0.1516 . 0.0565
## VISBLY 0.8333 0.8898 -0.1516 . 0.0565
## priorcdkOther 0.2222 0.3672 -0.3488 . 0.1450
## priorcdkYes 0.7778 0.6328 0.3488 . 0.1450
## HORMONRN 0.8889 0.8983 -0.0300 . 0.0094
## eCDF Max
## distance 0.4492
## NCHMST 0.2053
## endo 0.2316
## VISBLN 0.0565
## VISBLY 0.0565
## priorcdkOther 0.1450
## priorcdkYes 0.1450
## HORMONRN 0.0094
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.1443 0.1153 0.1793 2.9162 0.0169
## NCHMST 1.7222 1.6667 0.0517 3.3647 0.0778
## endo 1.5000 1.5278 -0.0253 1.2800 0.0347
## VISBLN 0.1667 0.1667 0.0000 . 0.0000
## VISBLY 0.8333 0.8333 0.0000 . 0.0000
## priorcdkOther 0.2222 0.2500 -0.0668 . 0.0278
## priorcdkYes 0.7778 0.7500 0.0668 . 0.0278
## HORMONRN 0.8889 0.8889 0.0000 . 0.0000
## eCDF Max Std. Pair Dist.
## distance 0.1667 0.2080
## NCHMST 0.1667 0.4655
## endo 0.0833 0.3794
## VISBLN 0.0000 0.2222
## VISBLY 0.0000 0.2222
## priorcdkOther 0.0278 0.6013
## priorcdkYes 0.0278 0.6013
## HORMONRN 0.0000 0.0000
##
## Sample Sizes:
## Control Treated
## All 354 18
## Matched 36 18
## Unmatched 318 0
## Discarded 0 0
b <- bal.tab(
m.out,
un = TRUE,
binary = "std", # force standardized differences for binary vars
continuous = "std" # explicit, though this is already the default
)
love.plot(
b,
stats = "mean.diffs",
abs = TRUE,
threshold = 0.05,
title="1:2 optimal matching"
)
## using matched set to estimate trt effect using g computation
fit_orr_m <- glm(
ORR ~ trt * (NCHMST+endo+VISBL+priorcdk+HORMONRN),
data = matched_data,
family = binomial(),
weights = matched_data$weights
)
eff_orr_m <- avg_comparisons(
fit_orr_m,
variables = "trt",
newdata = subset(trt == 1),
type = "response",
vcov = ~subclass
)
eff_orr_m
##
## Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
## -0.108 0.0944 -1.15 0.251 2.0 -0.293 0.0767
##
## Term: trt
## Type: response
## Comparison: 1 - 0
m.out <- matchit(
trt ~NCHMST+endo+VISBL+priorcdk+HORMONRN,
data = all,
method = "optimal", # nearest neighbor matching
distance = "glm", # logistic PS
link="logit",
exact = ~ HORMONRN, # 🔑 enforce exact matching
ratio=3
#caliper = 0.2 # optional (on logit scale)
)
#obtain the matched dataset
matched_data <- match.data(m.out)
#check covariate balance
summary(m.out)
##
## Call:
## matchit(formula = trt ~ NCHMST + endo + VISBL + priorcdk + HORMONRN,
## data = all, method = "optimal", distance = "glm", link = "logit",
## exact = ~HORMONRN, ratio = 3)
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.1443 0.0435 0.6234 10.9978 0.2330
## NCHMST 1.7222 1.4237 0.2779 4.0543 0.1030
## endo 1.5000 2.0763 -0.5248 1.3690 0.1441
## VISBLN 0.1667 0.1102 0.1516 . 0.0565
## VISBLY 0.8333 0.8898 -0.1516 . 0.0565
## priorcdkOther 0.2222 0.3672 -0.3488 . 0.1450
## priorcdkYes 0.7778 0.6328 0.3488 . 0.1450
## HORMONRN 0.8889 0.8983 -0.0300 . 0.0094
## eCDF Max
## distance 0.4492
## NCHMST 0.2053
## endo 0.2316
## VISBLN 0.0565
## VISBLY 0.0565
## priorcdkOther 0.1450
## priorcdkYes 0.1450
## HORMONRN 0.0094
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.1443 0.1046 0.2453 3.6291 0.0270
## NCHMST 1.7222 1.5741 0.1379 3.1838 0.0667
## endo 1.5000 1.5000 0.0000 1.2911 0.0463
## VISBLN 0.1667 0.1481 0.0497 . 0.0185
## VISBLY 0.8333 0.8519 -0.0497 . 0.0185
## priorcdkOther 0.2222 0.2593 -0.0891 . 0.0370
## priorcdkYes 0.7778 0.7407 0.0891 . 0.0370
## HORMONRN 0.8889 0.8889 0.0000 . 0.0000
## eCDF Max Std. Pair Dist.
## distance 0.1852 0.2471
## NCHMST 0.1852 0.5517
## endo 0.0926 0.3704
## VISBLN 0.0185 0.5466
## VISBLY 0.0185 0.5466
## priorcdkOther 0.0370 0.6236
## priorcdkYes 0.0370 0.6236
## HORMONRN 0.0000 0.0000
##
## Sample Sizes:
## Control Treated
## All 354 18
## Matched 54 18
## Unmatched 300 0
## Discarded 0 0
b <- bal.tab(
m.out,
un = TRUE,
binary = "std", # force standardized differences for binary vars
continuous = "std" # explicit, though this is already the default
)
love.plot(
b,
stats = "mean.diffs",
abs = TRUE,
threshold = 0.05,
title="1:3 optimal matching"
)
## using matched set to estimate trt effect using g computation
fit_orr_m <- glm(
ORR ~ trt * (NCHMST+endo+VISBL+priorcdk+HORMONRN),
data = matched_data,
family = binomial(),
weights = matched_data$weights
)
eff_orr_m <- avg_comparisons(
fit_orr_m,
variables = "trt",
newdata = subset(trt == 1),
type = "response",
vcov = ~subclass
)
eff_orr_m
##
## Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
## -0.0434 0.0875 -0.497 0.619 0.7 -0.215 0.128
##
## Term: trt
## Type: response
## Comparison: 1 - 0