by Ian White and Nicholas Latimer
adaptation for R by Paul Schneider
thanks to Jingyi Xuan for resolving discrepancies with Stata
updated IW 07feb2024
R users may also want to check out the ipcwswitch package
# Function to install and load all the packages used in the course
install_n_load <- function(packages){
for(package in packages){
if(eval(parse(text=paste("require(",package,")")))==0) {
install.packages(package)
}
eval(parse(text=paste("require(",package,")")))
}
}
required_packages = c("haven",
"psych",
"survival",
"flexsurv",
"ggplot2",
"cowplot",
"survminer",
"geepack",
"Hmisc",
"eha",
"rpsftm",
"rms")
options(repos = "https://cran.ma.imperial.ac.uk/") ## without this, Knit fails
# # clear work space from previous results
rm(list = setdiff(ls(),c("install_n_load","required_packages")))
# # load required packages
install_n_load(required_packages)
# # read data
novCEA.baseline = as.data.frame(read_dta("NovCEAevents.dta"))
novCEA.tdcs = as.data.frame(read_dta("NovCEAtdcs.dta"))
novCEA = merge(novCEA.baseline,novCEA.tdcs,by="id")
# # get rid of 43 obs apparently beyond the end of follow-up
# # (none of these have events)
novCEA = novCEA[novCEA$time < novCEA$admin,]
# # get rid of 7 obs on the day of death
# # (these mess up later code)
novCEA = novCEA[novCEA$time != novCEA$deathtime,]
# # create dummies for being the first and last observation per person
novCEA$lastobs = novCEA$firstobs = 0
for(subj in unique(novCEA$id)){
temp.subset = novCEA[novCEA$id == subj,]
temp.subset$firstobs[which(temp.subset$time == min(temp.subset$time))] = 1
temp.subset$lastobs[which(temp.subset$time == max(temp.subset$time))] = 1
novCEA[novCEA$id == subj,] = temp.subset
}
# # create time-dependent outcome for death in this interval
novCEA$deathtdo = ifelse(novCEA$lastobs & novCEA$deathind,1,0)
# # create time-dependent variable for switching in this interval
switchInInterval = with(novCEA,(xotime>=time) & (xotime < time +21 ) & (xoind))
novCEA$xotdo = ifelse(switchInInterval,1,0)
NAvalue = with(novCEA, (xotime<time) & (xoind))
novCEA$xotdo = ifelse(NAvalue,NA,novCEA$xotdo)
# # create time-dependent variable for time since progression
# # 4 failures occur between time 0 and 21
# # & are implicitly treated as occurring at time 0
# # all other failures occur at multiples of 21 days
failureAtStart = with(novCEA, (progtime<time+21) & (progind==1))
novCEA$progtdc = ifelse(failureAtStart,1,0)
# # create time-dependent variables for time since progression
novCEA$recentprogtdc = with(novCEA,
(progtime<time+21) & (progtime>=time-42) & (progind==1))
novCEA$nowprogtdc = with(novCEA,
(progtime<time+21) & (progtime>=time) & (progind==1))
# # save dataframe
write.csv(novCEA,"NovCEA_IPCWready.csv",row.names = F)
# View(novCEA)
with(novCEA, table(deathtdo,trtrand))
## trtrand
## deathtdo 0 1
## 0 1132 2658
## 1 103 154
with(novCEA, table(xotdo,trtrand))
## trtrand
## xotdo 0 1
## 0 798 2812
## 1 49 0
with(novCEA, table(progtdc,trtrand))
## trtrand
## progtdc 0 1
## 0 557 1909
## 1 678 903
with(novCEA, table(recentprogtdc,trtrand))
## trtrand
## recentprogtdc 0 1
## FALSE 949 2364
## TRUE 286 448
with(novCEA, table(nowprogtdc,trtrand))
## trtrand
## nowprogtdc 0 1
## FALSE 1130 2649
## TRUE 105 163
Let’s see how many observations we have of each sort.
with(novCEA, table(deathtdo, xotdo, trtrand, useNA="ifany"))
## , , trtrand = 0
##
## xotdo
## deathtdo 0 1 <NA>
## 0 742 48 342
## 1 56 1 46
##
## , , trtrand = 1
##
## xotdo
## deathtdo 0 1 <NA>
## 0 2658 0 0
## 1 154 0 0
# # check we get exactly the same ITT results as in the previous practical using the reformat data
novCEA.itt = novCEA
novCEA.itt$tin = novCEA$time
novCEA.itt$tout = pmin((novCEA.itt$time + 21), novCEA.itt$deathtime)
novSurv.itt = Surv(time=novCEA.itt$tin, # now with intervall time1
time2=novCEA.itt$tout, # time2
event = novCEA.itt$deathtdo)
coxitt = coxph(novSurv.itt ~ trtrand, data = novCEA.itt, ties=("breslow"))
coxitt
## Call:
## coxph(formula = novSurv.itt ~ trtrand, data = novCEA.itt, ties = ("breslow"))
##
## coef exp(coef) se(coef) z p
## trtrand -0.5221 0.5933 0.1286 -4.059 4.92e-05
##
## Likelihood ratio test=15.8 on 1 df, p=7.044e-05
## n= 4047, number of events= 257
exp(confint(coxitt))
## 2.5 % 97.5 %
## trtrand 0.4610846 0.7633627
# # check we get exactly the same PP results as in the previous practical using the reformat data
novCEA.pp = novCEA[novCEA$xotdo==0,]
novCEA.pp$tin = novCEA.pp$time
novCEA.pp$tout = pmin((novCEA.pp$time + 21), novCEA.pp$deathtime)
novSurv.pp = Surv(time=novCEA.pp$tin, # now with intervall time1
time2=novCEA.pp$tout, # time2
event = novCEA.pp$deathtdo)
coxpp = coxph(novSurv.pp ~ trtrand, data = novCEA.pp,ties=("breslow"))
coxpp
## Call:
## coxph(formula = novSurv.pp ~ trtrand, data = novCEA.pp, ties = ("breslow"))
##
## coef exp(coef) se(coef) z p
## trtrand -0.6026 0.5474 0.1659 -3.633 0.00028
##
## Likelihood ratio test=12.2 on 1 df, p=0.0004789
## n= 3610, number of events= 210
## (388 observations deleted due to missingness)
exp(confint(coxpp))
## 2.5 % 97.5 %
## trtrand 0.3954296 0.7576677
<br><br>
(WE WILL USE LINEAR AND QUADRATIC TERMS IN TIME. A BETTER APPROACH USING SPLINES IS GIVEN LATER)
fitBLswitch_lin = glm(xotdo ~ time, family = binomial(link = 'logit'), data = novCEA[novCEA$trtrand == 0,])
summary(fitBLswitch_lin)
##
## Call:
## glm(formula = xotdo ~ time, family = binomial(link = "logit"),
## data = novCEA[novCEA$trtrand == 0, ])
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.142647 0.217139 -14.473 <2e-16 ***
## time 0.003117 0.001233 2.528 0.0115 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 374.40 on 846 degrees of freedom
## Residual deviance: 368.79 on 845 degrees of freedom
## (388 observations deleted due to missingness)
## AIC: 372.79
##
## Number of Fisher Scoring iterations: 5
# # odds ratios
data.frame(OR =exp(fitBLswitch_lin$coefficients),
CI95_lower = exp(confint(fitBLswitch_lin))[,1],
CI95_upper = exp(confint(fitBLswitch_lin))[,2])
## OR CI95_lower CI95_upper
## (Intercept) 0.04316837 0.02771498 0.06505688
## time 1.00312226 1.00056958 1.00545542
logLik(fitBLswitch_lin)
## 'log Lik.' -184.3937 (df=2)
AIC(fitBLswitch_lin)
## [1] 372.7874
BIC(fitBLswitch_lin)
## [1] 382.2708
# plot(fitBLswitch_lin)
# linear model is quite poor, fit time-squared model
novCEA$time2 <- novCEA$time^2
fitBLswitch = glm(xotdo ~ time + time2, family = binomial(link = 'logit'), data = novCEA[novCEA$trtrand == 0,])
summary(fitBLswitch)
##
## Call:
## glm(formula = xotdo ~ time + time2, family = binomial(link = "logit"),
## data = novCEA[novCEA$trtrand == 0, ])
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.998e+00 7.329e-01 -8.183 2.77e-16 ***
## time 5.193e-02 1.084e-02 4.791 1.66e-06 ***
## time2 -1.540e-04 3.777e-05 -4.076 4.58e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 374.40 on 846 degrees of freedom
## Residual deviance: 323.58 on 844 degrees of freedom
## (388 observations deleted due to missingness)
## AIC: 329.58
##
## Number of Fisher Scoring iterations: 8
# # odds ratios
data.frame(OR =exp(fitBLswitch$coefficients),
CI95_lower = exp(confint(fitBLswitch))[,1],
CI95_upper = exp(confint(fitBLswitch))[,2])
## OR CI95_lower CI95_upper
## (Intercept) 0.002484837 0.0004912639 0.008887927
## time 1.053306287 1.0334692053 1.078624063
## time2 0.999846055 0.9997623390 0.999911077
logLik(fitBLswitch)
## 'log Lik.' -161.7875 (df=3)
AIC(fitBLswitch)
## [1] 329.575
BIC(fitBLswitch)
## [1] 343.8001
plot(fitBLswitch)
# here's a way to do the Hosmer-Lemeshow test
install.packages("glmtoolbox")
## Installing package into 'C:/Rlibrary/4.3'
## (as 'lib' is unspecified)
## package 'glmtoolbox' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\rmjwiww\AppData\Local\Temp\RtmpmGGePQ\downloaded_packages
library(glmtoolbox)
## Warning: package 'glmtoolbox' was built under R version 4.3.2
##
## Attaching package: 'glmtoolbox'
## The following object is masked from 'package:geepack':
##
## QIC
hltest(fitBLswitch_lin)
##
## The Hosmer-Lemeshow goodness-of-fit test
##
## Group Size Observed Expected
## 1 108 0 4.469254
## 2 106 0 4.670186
## 3 104 1 4.877533
## 4 97 1 4.841657
## 5 92 14 4.886268
## 6 73 8 4.124637
## 7 108 9 6.680540
## 8 79 12 5.635539
## 9 80 4 8.814386
##
## Statistic = 49.35495
## degrees of freedom = 7
## p-value = 1.9331e-08
hltest(fitBLswitch)
##
## The Hosmer-Lemeshow goodness-of-fit test
##
## Group Size Observed Expected
## 1 135 0 0.2711346
## 2 110 0 0.7380215
## 3 111 1 1.7667569
## 4 104 1 3.4516483
## 5 101 15 6.0004932
## 6 84 9 7.7132139
## 7 92 7 11.5471781
## 8 110 16 17.5115534
##
## Statistic = 19.94313
## degrees of freedom = 6
## p-value = 0.0028347
# quadratic model is better but could still be improved
# we'll use it
<br><br>
novCEA$pxo1 = predict(fitBLswitch,newdata =novCEA, type="response")
summary(novCEA$pxo1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000000 0.006862 0.034332 0.060453 0.130327 0.165495
fitBLTDswitch = glm(xotdo ~ prognosisb + CEAb + HRQLb + nowprogtdc + CEAtdc + HRQLtdc + time + time2,
family = binomial(link = 'logit'),
data = novCEA[novCEA$trtrand == 0 & novCEA$recentprogtdc == 1,])
summary(fitBLTDswitch)
##
## Call:
## glm(formula = xotdo ~ prognosisb + CEAb + HRQLb + nowprogtdc +
## CEAtdc + HRQLtdc + time + time2, family = binomial(link = "logit"),
## data = novCEA[novCEA$trtrand == 0 & novCEA$recentprogtdc ==
## 1, ])
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.7847760 5.5036508 -1.233 0.21766
## prognosisb -2.7384749 2.8305719 -0.967 0.33331
## CEAb 0.1388221 0.3785942 0.367 0.71386
## HRQLb 0.8469678 2.2817367 0.371 0.71049
## nowprogtdcTRUE 1.6508039 0.5373881 3.072 0.00213 **
## CEAtdc -0.0900447 0.2717116 -0.331 0.74034
## HRQLtdc -0.1055115 2.6130755 -0.040 0.96779
## time 0.0620480 0.0303804 2.042 0.04112 *
## time2 -0.0001136 0.0001101 -1.032 0.30210
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 223.83 on 201 degrees of freedom
## Residual deviance: 108.06 on 193 degrees of freedom
## (84 observations deleted due to missingness)
## AIC: 126.06
##
## Number of Fisher Scoring iterations: 6
data.frame(OR =exp(fitBLTDswitch$coefficients),
CI95_lower = exp(confint(fitBLTDswitch))[,1],
CI95_upper = exp(confint(fitBLTDswitch))[,2])
## OR CI95_lower CI95_upper
## (Intercept) 0.001130861 1.554144e-08 45.461215
## prognosisb 0.064668895 2.110449e-04 13.199732
## CEAb 1.148919701 5.490330e-01 2.451344
## HRQLb 2.332563388 2.561079e-02 217.361999
## nowprogtdcTRUE 5.211167519 1.872867e+00 15.700423
## CEAtdc 0.913890305 5.397361e-01 1.561309
## HRQLtdc 0.899864111 4.908600e-03 149.945607
## time 1.064013403 1.006283e+00 1.131241
## time2 0.999886440 9.996772e-01 1.000093
logLik(fitBLTDswitch)
## 'log Lik.' -54.03214 (df=9)
AIC(fitBLTDswitch)
## [1] 126.0643
BIC(fitBLTDswitch)
## [1] 155.8387
hltest(fitBLTDswitch)
##
## The Hosmer-Lemeshow goodness-of-fit test
##
## Group Size Observed Expected
## 1 20 0 0.07463176
## 2 20 1 0.15648071
## 3 20 0 0.27618149
## 4 20 0 0.37600229
## 5 20 1 0.64665168
## 6 20 2 1.55539425
## 7 20 0 3.92791273
## 8 20 9 8.18273190
## 9 20 19 13.53247547
## 10 20 15 18.31513584
## 11 2 2 1.95640188
##
## Statistic = 24.68309
## degrees of freedom = 9
## p-value = 0.0033425
novCEA$pxo2 = predict(fitBLTDswitch, newdata = novCEA,type="response")
# # set prob of switching to zero where progtypetdc==0
novCEA$pxo2[novCEA$trtrand==0 & novCEA$recentprogtdc ==0] = 0
# # explore fitted probabilities
ggplot(novCEA) +
geom_point(aes(x= time, y= pxo1, col = "given baseline covariates")) +
geom_point(aes(x= time, y= pxo2,col="given baseline & time-updated covariates")) +
theme_minimal() +
theme(legend.position = "bottom") +
ylab("Probability of switch") +
xlab("Time") +
guides(fill=guide_legend(title=""))
novCEA$denom = novCEA$num = NA
for(subj in unique(novCEA$id)){
temp.subset = novCEA[novCEA$id == subj,]
temp.subset = temp.subset[order(temp.subset$time),]
temp.subset$num[1] = 1 - temp.subset$pxo1[1]
temp.subset$denom[1] = 1 - temp.subset$pxo2[1]
len = length(temp.subset$num)
if(len>1){
for(i in 2:len){
temp.subset$num[i] = temp.subset$num[i-1] * (1-temp.subset$pxo1[i])
temp.subset$denom[i] = temp.subset$denom[i-1] * (1-temp.subset$pxo2[i])
}
}
novCEA[novCEA$id == subj,] = temp.subset
}
isControl = novCEA$trtrand==0
novCEA$weight[isControl] = 1 / novCEA$denom[isControl]
novCEA$sweight[isControl] = novCEA$num[isControl] / novCEA$denom[isControl]
# # Unstablised Weights:
summary(novCEA$weight[novCEA$xotdo==0])
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 1.000 1.000 1.135 1.021 11.525 3200
# # Stabilised Weights:
summary(novCEA$sweight[novCEA$xotdo==0])
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.283 0.694 0.941 0.885 0.991 6.753 3200
plot_grid(
ggplot(novCEA[novCEA$xotdo==0,]) +
geom_histogram(aes(weight)) +
theme_minimal()
,
ggplot(novCEA[novCEA$xotdo==0,]) +
geom_histogram(aes(sweight)) +
theme_minimal()
)
novCEA$weight[!isControl] = 1
novCEA$sweight[!isControl] = 1
Estimate the KM graph and IPCW hazard ratio using Cox regression.
##### data preparation
novCEA.KM = novCEA[novCEA$xotdo==0,]
novCEA.KM$tin = novCEA.KM$time
novCEA.KM$tout = pmin((novCEA.KM$time + 21), novCEA.KM$deathtime)
novSurv.KM = Surv(time=novCEA.KM$tin, # now with intervall time1
time2=novCEA.KM$tout, # time2
event = novCEA.KM$deathtdo)
# # NOWEIGHTS (for comparison)
novSurv.KM.fit.NOweights = survfit(novSurv.KM ~ trtrand, data = novCEA.KM)
ggsurvplot(novSurv.KM.fit.NOweights,risk.table = T) +
ggtitle("UNWEIGHTED Results")
# # unstabilised weights
novSurv.KM.fit.weighted = survfit(novSurv.KM ~ trtrand, data = novCEA.KM, weights = weight)
# # plot
ggsurvplot(novSurv.KM.fit.weighted,risk.table = T) +
ggtitle("Results with unstabilised weights")
# cox regression
cox.wu = coxph(novSurv.KM ~ trtrand, data = novCEA.KM, weights = weight, ties = ("breslow"), cluster=id)
cox.wu
## Call:
## coxph(formula = novSurv.KM ~ trtrand, data = novCEA.KM, weights = weight,
## ties = ("breslow"), cluster = id)
##
## coef exp(coef) se(coef) robust se z p
## trtrand -0.6995 0.4968 0.1519 0.1731 -4.042 5.31e-05
##
## Likelihood ratio test=19.78 on 1 df, p=8.706e-06
## n= 3610, number of events= 210
## (388 observations deleted due to missingness)
exp(confint(cox.wu))
## 2.5 % 97.5 %
## trtrand 0.3539187 0.6974928
# I'm told that the correct way to get sandwich variances is not the above but
# install.packages(c("sandwich","lmtest"))
# library(sandwich)
# library(lmtest)
# coeftest(cox.wu, vcov = sandwich, cluster = ~id)
# This agrees somewhat better with the Stata results
# Thanks to Amy Chang for this
# # STABILISED WEIGHTS
novSurv.KM.fit.weighted2 = survfit(novSurv.KM ~ trtrand, data = novCEA.KM, weights = sweight)
# # plot
ggsurvplot(novSurv.KM.fit.weighted2,risk.table = T) +
ggtitle("Results with stabilised weights")
# cox regression
cox.ws = coxph(novSurv.KM ~ trtrand, data = novCEA.KM, weights = sweight,ties = ("breslow"), cluster=id)
cox.ws
## Call:
## coxph(formula = novSurv.KM ~ trtrand, data = novCEA.KM, weights = sweight,
## ties = ("breslow"), cluster = id)
##
## coef exp(coef) se(coef) robust se z p
## trtrand -0.7041 0.4945 0.1843 0.1794 -3.926 8.65e-05
##
## Likelihood ratio test=13.55 on 1 df, p=0.0002329
## n= 3610, number of events= 210
## (388 observations deleted due to missingness)
exp(confint(cox.ws))
## 2.5 % 97.5 %
## trtrand 0.3479575 0.702884
# Cox models could optionally adjust for covariates
Note that the cluster-robust variance estimator (and hence
the CI) differs slightly from those in the Stata analysis.
# remove previously computed vars
novCEA$pxo2 = novCEA$pxo1 = novCEA$weight = novCEA$sweight = NULL
First create splines for a time-dependent intercept Generate 5 knots based upon the event time distribution (can try other numbers of knots)
knots.R = rcspline.eval(novCEA$time[novCEA$xotdo==1], knots.only=T, pc=T, nk=6)
knots.R
## [1] 84 105 147 168 210
# Stata creates slightly different knots, we will use those
knots.Stata = c(42,84,126,168,273)
fitBLswitch.sp = glm(xotdo ~ rcs(time,knots.Stata), family = binomial(link="logit"), data = novCEA[novCEA$trtrand==0,])
summary(fitBLswitch.sp)
##
## Call:
## glm(formula = xotdo ~ rcs(time, knots.Stata), family = binomial(link = "logit"),
## data = novCEA[novCEA$trtrand == 0, ])
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -11.06698 2.59159 -4.270 1.95e-05 ***
## rcs(time, knots.Stata)time 0.12374 0.03651 3.390 0.00070 ***
## rcs(time, knots.Stata)time' -0.91563 0.28757 -3.184 0.00145 **
## rcs(time, knots.Stata)time'' 2.31678 0.74610 3.105 0.00190 **
## rcs(time, knots.Stata)time''' -2.08256 0.71362 -2.918 0.00352 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 374.40 on 846 degrees of freedom
## Residual deviance: 310.93 on 842 degrees of freedom
## (388 observations deleted due to missingness)
## AIC: 320.93
##
## Number of Fisher Scoring iterations: 9
# # odds ratios
data.frame(OR = exp(fitBLswitch.sp$coefficients),
CI95_lower = exp(confint(fitBLswitch.sp))[,1],
CI95_upper = exp(confint(fitBLswitch.sp))[,2])
## OR CI95_lower CI95_upper
## (Intercept) 1.561970e-05 3.220756e-08 0.000903275
## rcs(time, knots.Stata)time 1.131721e+00 1.067002e+00 1.232348027
## rcs(time, knots.Stata)time' 4.002642e-01 2.119126e-01 0.658349286
## rcs(time, knots.Stata)time'' 1.014291e+01 2.694118e+00 50.877125515
## rcs(time, knots.Stata)time''' 1.246110e-01 2.781889e-02 0.460305065
logLik(fitBLswitch.sp)
## 'log Lik.' -155.4667 (df=5)
AIC(fitBLswitch.sp)
## [1] 320.9334
BIC(fitBLswitch.sp)
## [1] 344.6419
plot(fitBLswitch.sp)
novCEA$pxo1 = predict(fitBLswitch.sp, newdata = novCEA, type="response")
fitBLTDswitch.sp = glm(xotdo ~ prognosisb + CEAb + HRQLb + nowprogtdc + CEAtdc + HRQLtdc + rcs(time, knots.Stata), family = binomial(link = 'logit'), data = novCEA[novCEA$trtrand == 0 & novCEA$recentprogtdc > 0,])
summary(fitBLTDswitch.sp)
##
## Call:
## glm(formula = xotdo ~ prognosisb + CEAb + HRQLb + nowprogtdc +
## CEAtdc + HRQLtdc + rcs(time, knots.Stata), family = binomial(link = "logit"),
## data = novCEA[novCEA$trtrand == 0 & novCEA$recentprogtdc >
## 0, ])
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -12.7357 6.7545 -1.886 0.05936 .
## prognosisb -1.9575 2.9281 -0.669 0.50381
## CEAb 0.2772 0.3994 0.694 0.48767
## HRQLb 1.0984 2.3283 0.472 0.63710
## nowprogtdcTRUE 1.7569 0.5410 3.247 0.00117 **
## CEAtdc -0.1756 0.2811 -0.625 0.53219
## HRQLtdc -0.2904 2.6503 -0.110 0.91275
## rcs(time, knots.Stata)time 0.1330 0.0574 2.317 0.02053 *
## rcs(time, knots.Stata)time' -0.8992 0.4701 -1.913 0.05579 .
## rcs(time, knots.Stata)time'' 2.5201 1.3051 1.931 0.05348 .
## rcs(time, knots.Stata)time''' -2.7147 1.4359 -1.891 0.05867 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 223.83 on 201 degrees of freedom
## Residual deviance: 103.82 on 191 degrees of freedom
## (84 observations deleted due to missingness)
## AIC: 125.82
##
## Number of Fisher Scoring iterations: 7
# # odds ratios
data.frame(OR =exp(fitBLTDswitch.sp$coefficients),
CI95_lower = exp(confint(fitBLTDswitch.sp))[,1],
CI95_upper = exp(confint(fitBLTDswitch.sp))[,2])
## OR CI95_lower CI95_upper
## (Intercept) 2.944021e-06 1.963261e-12 0.8180748
## prognosisb 1.412173e-01 4.039642e-04 42.8982655
## CEAb 1.319398e+00 6.103808e-01 2.9712651
## HRQLb 2.999312e+00 3.032446e-02 310.9014078
## nowprogtdcTRUE 5.794460e+00 2.070176e+00 17.6058457
## CEAtdc 8.389534e-01 4.773519e-01 1.4520040
## HRQLtdc 7.479650e-01 3.733854e-03 133.7213596
## rcs(time, knots.Stata)time 1.142216e+00 1.039646e+00 1.3011130
## rcs(time, knots.Stata)time' 4.069098e-01 1.456317e-01 0.9199604
## rcs(time, knots.Stata)time'' 1.243016e+01 1.183884e+00 202.3698960
## rcs(time, knots.Stata)time''' 6.622355e-02 3.425509e-03 1.0351274
logLik(fitBLTDswitch.sp)
## 'log Lik.' -51.90815 (df=11)
AIC(fitBLTDswitch.sp)
## [1] 125.8163
BIC(fitBLTDswitch.sp)
## [1] 162.2073
plot(fitBLTDswitch.sp)
novCEA$pxo2 = predict(fitBLTDswitch.sp, newdata = novCEA, type="response")
# # set prob of switching to zero where recentprogtdc==0
novCEA$pxo2[novCEA$trtrand==0 & novCEA$recentprogtdc == 0] = 0
novCEA$denom = novCEA$num = NA
for(subj in unique(novCEA$id)){
temp.subset = novCEA[novCEA$id == subj,]
temp.subset = temp.subset[order(temp.subset$time),]
temp.subset$num[1] = 1 - temp.subset$pxo1[1]
temp.subset$denom[1] = 1 - temp.subset$pxo2[1]
len = length(temp.subset$num)
if(len>1){
for(i in 2:len){
temp.subset$num[i] = temp.subset$num[i-1] * (1-temp.subset$pxo1[i])
temp.subset$denom[i] = temp.subset$denom[i-1] * (1-temp.subset$pxo2[i])
}
}
novCEA[novCEA$id == subj,] = temp.subset
}
isControl = novCEA$trtrand==0
novCEA$weight[isControl] = 1 / novCEA$denom[isControl]
novCEA$sweight[isControl] = novCEA$num[isControl] / novCEA$denom[isControl]
# # Unstablised Weights:
summary(novCEA$weight[novCEA$xotdo==0])
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 1.000 1.000 1.114 1.019 7.287 3200
# # Stabilised Weights:
summary(novCEA$sweight[novCEA$xotdo==0])
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.274 0.702 0.966 0.871 1.000 4.486 3200
plot_grid(
ggplot(novCEA[novCEA$xotdo==0,]) +
geom_histogram(aes(weight)) +
theme_minimal()
,
ggplot(novCEA[novCEA$xotdo==0,]) +
geom_histogram(aes(sweight)) +
theme_minimal()
)
novCEA$weight[!isControl] = 1
novCEA$sweight[!isControl] = 1
novCEA.KM = novCEA[novCEA$xotdo==0,]
novCEA.KM$tin = novCEA.KM$time
novCEA.KM$tout = pmin((novCEA.KM$time + 21), novCEA.KM$deathtime)
novSurv.KM = Surv(time=novCEA.KM$tin, # now with intervall time1
time2=novCEA.KM$tout, # time2
event = novCEA.KM$deathtdo)
# # # NOWEIGHTS - as comparison (code not run)
# novSurv.KM.fit.NOweights = survfit(novSurv.KM ~ trtrand, data = novCEA.KM)
#
# ggsurvplot(novSurv.KM.fit.NOweights,risk.table = T) +
# ggtitle("UNWEIGHTED Results")
# # UNSTABILISED WEIGHTS
novSurv.KM.fit.weighted = survfit(novSurv.KM ~ trtrand, data = novCEA.KM, weights = weight)
# # plot
ggsurvplot(novSurv.KM.fit.weighted,risk.table = T) +
ggtitle("Results with unstabilised weights")
# cox regression
cox2.wu = coxph(novSurv.KM ~ trtrand, data = novCEA.KM, weights = weight, ties = ("breslow"), cluster=id)
cox2.wu
## Call:
## coxph(formula = novSurv.KM ~ trtrand, data = novCEA.KM, weights = weight,
## ties = ("breslow"), cluster = id)
##
## coef exp(coef) se(coef) robust se z p
## trtrand -0.7146 0.4894 0.1511 0.1699 -4.207 2.59e-05
##
## Likelihood ratio test=20.76 on 1 df, p=5.214e-06
## n= 3610, number of events= 210
## (388 observations deleted due to missingness)
exp(confint(cox2.wu))
## 2.5 % 97.5 %
## trtrand 0.3508025 0.6827413
# # STABILISED WEIGHTS
novSurv.KM.fit.weighted2 = survfit(novSurv.KM ~ trtrand, data = novCEA.KM, weights = sweight)
# # plot
ggsurvplot(novSurv.KM.fit.weighted2,risk.table = T) +
ggtitle("Results with stabilised weights")
# cox regression
cox2.ws = coxph(novSurv.KM ~ trtrand, data = novCEA.KM, weights = sweight, ties = ("breslow"), cluster=id)
cox2.ws
## Call:
## coxph(formula = novSurv.KM ~ trtrand, data = novCEA.KM, weights = sweight,
## ties = ("breslow"), cluster = id)
##
## coef exp(coef) se(coef) robust se z p
## trtrand -0.7249 0.4844 0.1844 0.1764 -4.109 3.97e-05
##
## Likelihood ratio test=14.24 on 1 df, p=0.0001609
## n= 3610, number of events= 210
## (388 observations deleted due to missingness)
exp(confint(cox2.ws))
## 2.5 % 97.5 %
## trtrand 0.3427733 0.6844227