# Explore the data structure using str()
str(htndat)
## 'data.frame': 4998 obs. of 17 variables:
## $ DBP : int 60 75 60 60 60 60 59 60 70 58 ...
## $ SBP : int 90 110 80 90 100 120 100 100 120 110 ...
## $ BMI : num NA 27.3 17.7 19.9 21.3 ...
## $ age : num 28 26.5 43 50.1 30.6 ...
## $ married : int 0 1 0 1 0 1 0 0 1 1 ...
## $ male.gender : int 0 0 0 1 1 0 0 1 0 0 ...
## $ hgb_centered : num NA -3.9 -3.2 NA -0.4 ...
## $ adv_HIV : int NA 0 NA NA NA NA NA NA NA NA ...
## $ survtime : int 338 439 752 526 215 272 1669 388 14 419 ...
## $ event : int 1 1 1 1 1 1 1 1 1 1 ...
## $ arv_naive : int 1 1 1 1 1 1 1 0 1 1 ...
## $ urban.clinic : int 0 1 0 1 0 0 1 0 0 0 ...
## $ log_creat_centered: num NA 5.42e-02 -3.60e-01 NA -1.00e-07 ...
## $ IPW_weight : num 0.924 1.164 0.721 0.829 0.856 ...
## $ SBP_ge120 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ survmonth : num 12 15 26 18 8 10 56 13 1 14 ...
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
# Explore % of missing values by variable
miss <- apply(htndat, 2, function(x) sum(is.na(x))/nrow(htndat)*100)
# Use kable() function from kableExtra package to get a clean table
miss <- miss[miss != 0] %>%
kableExtra::kable(booktabs = TRUE,
digits = 2,
col.names = "",
caption="Missing data percentage among variables in the dataset") %>%
kableExtra::kable_styling(latex_options = c("striped", "HOLD_position")
)
miss
BMI | 13.39 |
married | 3.26 |
hgb_centered | 27.87 |
adv_HIV | 39.22 |
log_creat_centered | 30.75 |
## ----summaries and tables for HTN data -----------------------------------------------------
# Create a summary table
gtsummary::tbl_summary(data = htndat,
by = adv_HIV,
include = c(SBP, BMI, age, married,
male.gender, hgb_centered, event,
arv_naive, urban.clinic,
log_creat_centered,
SBP_ge120, adv_HIV),
label = list(adv_HIV = "Advanced HIV, 1 = Yes",
event = "Event (Death)"),
missing = "ifany",
missing_text = "Missing %",
missing_stat = "{p_miss}") %>%
gtsummary::bold_labels() %>%
gtsummary::add_overall() %>%
gtsummary::add_p()
Characteristic | Overall N = 3,0381 |
0 N = 1,1301 |
1 N = 1,9081 |
p-value2 |
---|---|---|---|---|
SBP | 110 (100, 120) | 110 (100, 120) | 110 (100, 120) | 0.003 |
BMI | 21.0 (18.9, 23.4) | 21.9 (19.7, 24.7) | 20.5 (18.6, 22.8) | <0.001 |
Missing % | 5.4 | 5.5 | 5.3 | |
age | 34 (27, 42) | 31 (26, 38) | 35 (29, 44) | <0.001 |
married | 1,773 (59%) | 693 (62%) | 1,080 (58%) | 0.010 |
Missing % | 1.9 | 1.9 | 1.9 | |
male.gender | 743 (24%) | 206 (18%) | 537 (28%) | <0.001 |
hgb_centered | 0.20 (-1.60, 1.70) | 0.60 (-1.10, 1.80) | -0.20 (-2.00, 1.50) | <0.001 |
Missing % | 12 | 12 | 12 | |
Event (Death) | 398 (13%) | 99 (8.8%) | 299 (16%) | <0.001 |
arv_naive | 2,871 (95%) | 1,057 (94%) | 1,814 (95%) | 0.073 |
urban.clinic | 1,526 (50%) | 573 (51%) | 953 (50%) | 0.7 |
log_creat_centered | -0.09 (-0.28, 0.10) | -0.13 (-0.29, 0.06) | -0.08 (-0.28, 0.12) | 0.002 |
Missing % | 12 | 13 | 12 | |
SBP_ge120 | 174 (5.7%) | 63 (5.6%) | 111 (5.8%) | 0.8 |
1 Median (Q1, Q3); n (%) | ||||
2 Wilcoxon rank sum test; Pearson’s Chi-squared test |
## ---view table of HTN data
# Inspect key variables
str(htndat[c("survmonth", "event")])
## 'data.frame': 4998 obs. of 2 variables:
## $ survmonth: num 12 15 26 18 8 10 56 13 1 14 ...
## $ event : int 1 1 1 1 1 1 1 1 1 1 ...
# 'survmonth': num ...
# 'event' : int ...
## ----Create survival object, fit KM curve
# Create the survival object
surv_obj <- with(htndat, Surv(time = survmonth, event = event))
# Explanation:
# - 'time' = months from baseline to death or censoring
# - 'event' = 1 if death occurred, 0 if censored
# Fit overall KM curve
km_overall <- survfit(surv_obj ~ 1, data = htndat)
print(km_overall)
## Call: survfit(formula = surv_obj ~ 1, data = htndat)
##
## n events median 0.95LCL 0.95UCL
## [1,] 4998 749 NA NA NA
# basic plot
plot(km_overall, xlab="Months", ylab="Survival proportion")
plot(km_overall, xlab="Months", ylab="Survival proportion",
ylim=c(.6,1),
conf.int=FALSE)
# fancy plot
p = ggsurvplot(
km_overall,
data = htndat,
risk.table = TRUE,
conf.int = TRUE,
xlab = "Time (months)",
ylab = "Survival probability",
ggtheme = theme_minimal()
)
print(p)
summary(km_overall)
## Call: survfit(formula = surv_obj ~ 1, data = htndat)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 4998 65 0.987 0.00160 0.984 0.990
## 2 4394 44 0.977 0.00217 0.973 0.981
## 3 4210 43 0.967 0.00263 0.962 0.972
## 4 4066 28 0.960 0.00290 0.955 0.966
## 5 3955 29 0.953 0.00316 0.947 0.960
## 6 3849 26 0.947 0.00338 0.940 0.954
## 7 3741 23 0.941 0.00357 0.934 0.948
## 8 3626 24 0.935 0.00377 0.928 0.942
## 9 3508 18 0.930 0.00391 0.923 0.938
## 10 3397 13 0.927 0.00402 0.919 0.934
## 11 3306 16 0.922 0.00415 0.914 0.930
## 12 3213 21 0.916 0.00433 0.908 0.925
## 13 3130 25 0.909 0.00454 0.900 0.918
## 14 3024 13 0.905 0.00464 0.896 0.914
## 15 2947 21 0.898 0.00482 0.889 0.908
## 16 2852 19 0.892 0.00498 0.883 0.902
## 17 2764 19 0.886 0.00514 0.876 0.896
## 18 2680 17 0.881 0.00528 0.870 0.891
## 19 2609 7 0.878 0.00535 0.868 0.889
## 20 2547 20 0.871 0.00552 0.861 0.882
## 21 2475 7 0.869 0.00558 0.858 0.880
## 22 2410 11 0.865 0.00569 0.854 0.876
## 23 2349 14 0.860 0.00582 0.848 0.871
## 24 2274 10 0.856 0.00591 0.845 0.868
## 25 2217 11 0.852 0.00602 0.840 0.864
## 26 2160 12 0.847 0.00614 0.835 0.859
## 27 2082 7 0.844 0.00621 0.832 0.856
## 28 2018 7 0.841 0.00629 0.829 0.854
## 29 1948 9 0.837 0.00639 0.825 0.850
## 30 1876 9 0.833 0.00650 0.821 0.846
## 31 1805 3 0.832 0.00654 0.819 0.845
## 32 1758 13 0.826 0.00671 0.813 0.839
## 33 1696 6 0.823 0.00679 0.810 0.836
## 34 1644 9 0.818 0.00692 0.805 0.832
## 35 1590 9 0.814 0.00705 0.800 0.828
## 36 1534 4 0.812 0.00711 0.798 0.826
## 37 1477 5 0.809 0.00719 0.795 0.823
## 38 1432 8 0.804 0.00733 0.790 0.819
## 39 1381 10 0.799 0.00750 0.784 0.813
## 40 1321 3 0.797 0.00756 0.782 0.812
## 41 1283 3 0.795 0.00761 0.780 0.810
## 42 1237 6 0.791 0.00774 0.776 0.806
## 45 1119 3 0.789 0.00781 0.774 0.804
## 46 1078 4 0.786 0.00792 0.771 0.802
## 47 1034 11 0.778 0.00823 0.762 0.794
## 48 987 2 0.776 0.00829 0.760 0.792
## 49 944 1 0.775 0.00832 0.759 0.792
## 50 890 7 0.769 0.00857 0.753 0.786
## 51 845 6 0.764 0.00879 0.747 0.781
## 52 806 3 0.761 0.00891 0.744 0.778
## 53 766 7 0.754 0.00921 0.736 0.772
## 54 703 2 0.752 0.00931 0.734 0.770
## 55 653 4 0.747 0.00953 0.729 0.766
## 56 616 6 0.740 0.00989 0.721 0.759
## 57 581 4 0.735 0.01014 0.715 0.755
## 58 536 2 0.732 0.01029 0.712 0.752
## 59 502 1 0.731 0.01037 0.710 0.751
## 60 471 4 0.724 0.01074 0.704 0.746
## 62 396 4 0.717 0.01123 0.695 0.739
## 65 318 1 0.715 0.01142 0.693 0.738
## 66 292 6 0.700 0.01266 0.676 0.725
## 72 155 3 0.687 0.01464 0.658 0.716
## 73 133 1 0.681 0.01541 0.652 0.712
summary(km_overall, times = 12)
## Call: survfit(formula = surv_obj ~ 1, data = htndat)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 12 3213 350 0.916 0.00433 0.908 0.925
quantile(km_overall, probs=c(.05, .10, .25, .5))
## $quantile
## 5 10 25 50
## 6 15 55 NA
##
## $lower
## 5 10 25 50
## 5 13 51 NA
##
## $upper
## 5 10 25 50
## 7 17 60 NA
tbl_survfit(km_overall, probs=c(.05, .10, .25, .5))
Characteristic | 5.0% Percentile | 10% Percentile | 25% Percentile | 50% Percentile |
---|---|---|---|---|
Overall | 6.0 (5.0, 7.0) | 15 (13, 17) | 55 (51, 60) | — (—, —) |
# Hazard functions
fit.hazard <- bshazard(surv_obj ~ 1, data = htndat)
## Iterations: relative error in phi-hat = 1e-04
## phi= 1.414298 sv2= 0.03232137 df= 12.53428 lambda= 43.75735
## phi= 1.453477 sv2= 0.01388223 df= 9.037744 lambda= 104.7005
## phi= 1.477015 sv2= 0.00819069 df= 7.467542 lambda= 180.3286
## phi= 1.495896 sv2= 0.005662076 df= 6.645518 lambda= 264.1958
## phi= 1.511699 sv2= 0.004268779 df= 6.130303 lambda= 354.129
## phi= 1.525176 sv2= 0.003393509 df= 5.766437 lambda= 449.4393
## phi= 1.536825 sv2= 0.002797968 df= 5.488998 lambda= 549.2647
## phi= 1.54695 sv2= 0.002373234 df= 5.26765 lambda= 651.8322
## phi= 1.555726 sv2= 0.002062497 df= 5.087056 lambda= 754.2924
## phi= 1.563251 sv2= 0.001832578 df= 4.938872 lambda= 853.034
## phi= 1.569599 sv2= 0.001662059 df= 4.818013 lambda= 944.3702
## phi= 1.574844 sv2= 0.001535947 df= 4.720765 lambda= 1025.325
## phi= 1.57908 sv2= 0.001443162 df= 4.643867 lambda= 1094.181
## phi= 1.582427 sv2= 0.001375298 df= 4.58417 lambda= 1150.606
## phi= 1.585015 sv2= 0.001325938 df= 4.538627 lambda= 1195.392
## phi= 1.586982 sv2= 0.001290207 df= 4.50441 lambda= 1230.021
## phi= 1.588453 sv2= 0.001264441 df= 4.479026 lambda= 1256.249
## phi= 1.589541 sv2= 0.001245918 df= 4.460384 lambda= 1275.799
## phi= 1.590337 sv2= 0.001232631 df= 4.4468 lambda= 1290.197
## phi= 1.590916 sv2= 0.001223116 df= 4.43696 lambda= 1300.707
## phi= 1.591335 sv2= 0.001216311 df= 4.429863 lambda= 1308.328
## phi= 1.591636 sv2= 0.001211449 df= 4.42476 lambda= 1313.829
## phi= 1.591853 sv2= 0.001207977 df= 4.421101 lambda= 1317.784
## phi= 1.592008 sv2= 0.001205499 df= 4.418481 lambda= 1320.622
## phi= 1.592119 sv2= 0.00120373 df= 4.416608 lambda= 1322.654
## phi= 1.592199 sv2= 0.001202469 df= 4.415269 lambda= 1324.108
## phi= 1.592255 sv2= 0.001201569 df= 4.414313 lambda= 1325.146
## phi= 1.592296 sv2= 0.001200928 df= 4.413631 lambda= 1325.888
## phi= 1.592325 sv2= 0.00120047 df= 4.413144 lambda= 1326.417
## phi= 1.592345 sv2= 0.001200144 df= 4.412797 lambda= 1326.795
## phi= 1.59236 sv2= 0.001199912 df= 4.41255 lambda= 1327.064
## phi= 1.592371 sv2= 0.001199746 df= 4.412373 lambda= 1327.256
## phi= 1.592378 sv2= 0.001199628 df= 4.412247 lambda= 1327.393
## phi= 1.592383 sv2= 0.001199543 df= 4.412157 lambda= 1327.491
# Extract smoothed hazard and log-hazard
time_vals <- fit.hazard$time
hazard <- fit.hazard$hazard
log_hazard <- log(hazard)
## Plot hazard
plot(time_vals, hazard, type = "l", lwd = 2,
xlab = "Time", ylab = "h(t)", main = "Smoothed Hazard Function")
polygon(c(fit.hazard$time, rev(fit.hazard$time)),
c(fit.hazard$upper, rev(fit.hazard$lower)),
col = rgb(0, 0, 1, 0.2), border = NA)
plot(time_vals, log_hazard, type = "l", lwd = 2,
xlab = "Time", ylab = "log h(t)", main = "Smoothed Log-Hazard Function")
km_adv <- survfit(surv_obj ~ factor(adv_HIV), data = htndat)
plot(km_adv, lwd=3, col=c("blue","red"), ylim=c(.6,1) ,
xlab="Time in months", ylab="Survival proportion S(t)",
main="Survival curves by HIV Disease Stage")
legend("topright",
legend = c("Advanced HIV", "Not advanced"),
col = c("red", "blue"),
lwd = 2)
ggsurvplot(
km_adv,
data = htndat,
risk.table = TRUE,
pval = TRUE,
legend.labs= c("Not advanced","Advanced"),
xlab = "Time (months)",
ylab = "Survival probability",
title = "Survival by advanced HIV status",
conf.int = TRUE,
ggtheme = theme_minimal()
)
fit_adv <- bshazard(Surv(time = survmonth, event = event) ~ 1,
data = htndat[htndat$adv_HIV == 1, ])
## Iterations: relative error in phi-hat = 1e-04
## phi= 1.211447 sv2= 0.06181708 df= 10.05581 lambda= 19.59728
## phi= 1.268917 sv2= 0.033306 df= 8.687428 lambda= 38.09876
## phi= 1.307798 sv2= 0.01831245 df= 7.529565 lambda= 71.41576
## phi= 1.338337 sv2= 0.01047754 df= 6.590567 lambda= 127.7338
## phi= 1.366714 sv2= 0.006141416 df= 5.83713 lambda= 222.5406
## phi= 1.39498 sv2= 0.003541221 df= 5.207246 lambda= 393.9264
## phi= 1.423359 sv2= 0.001916264 df= 4.63941 lambda= 742.7782
## phi= 1.451633 sv2= 0.0009413058 df= 4.092872 lambda= 1542.148
## phi= 1.481331 sv2= 0.0004359487 df= 3.559289 lambda= 3397.948
## phi= 1.51785 sv2= 0.0002243509 df= 3.0804 lambda= 6765.517
## phi= 1.558386 sv2= 0.0001416363 df= 2.735216 lambda= 11002.73
fit_notadv <- bshazard(Surv(time = survmonth, event = event) ~ 1,
data = htndat[htndat$adv_HIV == 0, ])
## Iterations: relative error in phi-hat = 1e-04
## phi= 1.181919 sv2= 0.01696105 df= 8.003624 lambda= 69.68426
## phi= 1.202664 sv2= 0.003641724 df= 5.290794 lambda= 330.2457
## phi= 1.214462 sv2= 0.001546525 df= 3.904228 lambda= 785.2846
## phi= 1.232795 sv2= 0.0007790577 df= 3.335568 lambda= 1582.419
## phi= 1.249134 sv2= 0.0003846332 df= 2.955468 lambda= 3247.598
## phi= 1.262423 sv2= 0.0001732224 df= 2.632501 lambda= 7287.877
## phi= 1.271822 sv2= 7.016679e-05 df= 2.358936 lambda= 18125.69
## Common y-limits so the curves share the same scale
ylim_vals <- range(c(fit_adv$hazard, fit_notadv$hazard))
plot(fit_adv$time, fit_adv$hazard, type = "l", lwd = 2, col = "red",
xlab = "Time", ylab = "Hazard",
main = "Smoothed Hazard by HIV Disease Stage",
ylim = ylim_vals)
lines(fit_notadv$time, fit_notadv$hazard, lwd = 2, col = "blue")
legend("topright",
legend = c("Advanced HIV", "Not advanced"),
col = c("red", "blue"),
lwd = 2)
loghazard_adv = log(fit_adv$hazard)
loghazard_notadv = log(fit_notadv$hazard)
## Common y-limits so the curves share the same scale
ylim_vals <- range(c(loghazard_adv, loghazard_notadv))
## Plot the hazard functions
plot(fit_adv$time, loghazard_adv, type = "l", lwd = 2, col = "red",
xlab = "Time", ylab = "log Hazard",
main = "Smoothed log Hazard by HIV Disease Stage",
ylim = ylim_vals)
lines(fit_notadv$time, loghazard_notadv, lwd = 2, col = "blue")
legend("topright",
legend = c("Advanced HIV", "Not advanced"),
col = c("red", "blue"),
lwd = 2)
km_adv <- survfit(surv_obj ~ factor(adv_HIV), data = htndat)
# Plot the stratified survival function by HIV status
ggsurvplot(
km_adv,
data = htndat,
risk.table = TRUE,
pval = TRUE,
legend.labs= c("Not advanced","Advanced"),
xlab = "Time (months)",
ylab = "Survival probability",
title = "Survival by advanced HIV status",
conf.int = TRUE,
ggtheme = theme_minimal()
)
# Log‐rank test for HIV strata
lr_adv <- survdiff(surv_obj ~ factor(adv_HIV), data = htndat)
print(lr_adv)
## Call:
## survdiff(formula = surv_obj ~ factor(adv_HIV), data = htndat)
##
## n=3038, 1960 observations deleted due to missingness.
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## factor(adv_HIV)=0 1130 99 159 22.5 37.6
## factor(adv_HIV)=1 1908 299 239 14.9 37.6
##
## Chisq= 37.6 on 1 degrees of freedom, p= 9e-10
univ_cox <- coxph(Surv(survmonth, event) ~ adv_HIV,
data = htndat,
ties = "efron")
# Get summary of the first regression model that has only adv_HIV as covariate
summary(univ_cox)
## Call:
## coxph(formula = Surv(survmonth, event) ~ adv_HIV, data = htndat,
## ties = "efron")
##
## n= 3038, number of events= 398
## (1960 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## adv_HIV 0.697 2.008 0.116 6.01 1.86e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## adv_HIV 2.008 0.4981 1.6 2.52
##
## Concordance= 0.584 (se = 0.011 )
## Likelihood ratio test= 40.02 on 1 df, p=3e-10
## Wald test = 36.12 on 1 df, p=2e-09
## Score (logrank) test = 37.6 on 1 df, p=9e-10
# return a nice table of summary Cox regression results using
gtsummary::tbl_regression(univ_cox, exponentiate = TRUE)
Characteristic | HR1 | 95% CI1 | p-value |
---|---|---|---|
adv_HIV | 2.01 | 1.60, 2.52 | <0.001 |
1 HR = Hazard Ratio, CI = Confidence Interval |
# time-varying hazard ratio
# Fit Cox model with time-varying effect of adv_HIV
tv_cox <- coxph(surv_obj ~ adv_HIV + tt(adv_HIV),
data = htndat,
tt = function(x, t, ...) x * t)
# View model summary
summary(tv_cox)
## Call:
## coxph(formula = surv_obj ~ adv_HIV + tt(adv_HIV), data = htndat,
## tt = function(x, t, ...) x * t)
##
## n= 3038, number of events= 398
## (1960 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## adv_HIV 0.972570 2.644733 0.188536 5.159 2.49e-07 ***
## tt(adv_HIV) -0.013329 0.986759 0.006883 -1.937 0.0528 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## adv_HIV 2.6447 0.3781 1.8277 3.827
## tt(adv_HIV) 0.9868 1.0134 0.9735 1.000
##
## Concordance= 0.584 (se = 0.011 )
## Likelihood ratio test= 43.72 on 2 df, p=3e-10
## Wald test = 38.28 on 2 df, p=5e-09
## Score (logrank) test = 40.45 on 2 df, p=2e-09
# Center SBP at 120 for interpretability and save data in dat
dat <- htndat %>%
dplyr::mutate(sbp_c120 = SBP - 120)
# Fit Cox model with SBP only
model1 <- coxph(
Surv(survmonth, event) ~ sbp_c120,
data = dat,
ties = "efron"
)
summary(model1)
## Call:
## coxph(formula = Surv(survmonth, event) ~ sbp_c120, data = dat,
## ties = "efron")
##
## n= 4998, number of events= 749
##
## coef exp(coef) se(coef) z Pr(>|z|)
## sbp_c120 -0.02246 0.97779 0.00286 -7.853 4.05e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## sbp_c120 0.9778 1.023 0.9723 0.9833
##
## Concordance= 0.601 (se = 0.012 )
## Likelihood ratio test= 67.31 on 1 df, p=2e-16
## Wald test = 61.68 on 1 df, p=4e-15
## Score (logrank) test = 59.98 on 1 df, p=1e-14
# set knot points
knots <- quantile(dat$sbp_c120, probs = c(0.20, 0.50, 0.80))
# fit Cox model with a 3-df B-spline for sbp_c120
model_spline <- coxph(
Surv(survmonth, event) ~ bs(sbp_c120, knots = knots, degree = 3),
data = dat,
ties = "efron"
)
summary(model_spline)
## Call:
## coxph(formula = Surv(survmonth, event) ~ bs(sbp_c120, knots = knots,
## degree = 3), data = dat, ties = "efron")
##
## n= 4998, number of events= 749
##
## coef exp(coef) se(coef) z
## bs(sbp_c120, knots = knots, degree = 3)1 -2.037785 0.130317 1.264068 -1.612
## bs(sbp_c120, knots = knots, degree = 3)2 -1.525040 0.217612 0.806992 -1.890
## bs(sbp_c120, knots = knots, degree = 3)3 -2.890363 0.055556 0.905937 -3.190
## bs(sbp_c120, knots = knots, degree = 3)4 -2.315914 0.098676 0.947017 -2.445
## bs(sbp_c120, knots = knots, degree = 3)5 -2.554967 0.077695 2.070859 -1.234
## bs(sbp_c120, knots = knots, degree = 3)6 -7.040151 0.000876 4.756966 -1.480
## Pr(>|z|)
## bs(sbp_c120, knots = knots, degree = 3)1 0.10694
## bs(sbp_c120, knots = knots, degree = 3)2 0.05879 .
## bs(sbp_c120, knots = knots, degree = 3)3 0.00142 **
## bs(sbp_c120, knots = knots, degree = 3)4 0.01447 *
## bs(sbp_c120, knots = knots, degree = 3)5 0.21729
## bs(sbp_c120, knots = knots, degree = 3)6 0.13888
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95
## bs(sbp_c120, knots = knots, degree = 3)1 0.130317 7.674 1.094e-02
## bs(sbp_c120, knots = knots, degree = 3)2 0.217612 4.595 4.475e-02
## bs(sbp_c120, knots = knots, degree = 3)3 0.055556 18.000 9.410e-03
## bs(sbp_c120, knots = knots, degree = 3)4 0.098676 10.134 1.542e-02
## bs(sbp_c120, knots = knots, degree = 3)5 0.077695 12.871 1.342e-03
## bs(sbp_c120, knots = knots, degree = 3)6 0.000876 1141.560 7.823e-08
## upper .95
## bs(sbp_c120, knots = knots, degree = 3)1 1.5523
## bs(sbp_c120, knots = knots, degree = 3)2 1.0583
## bs(sbp_c120, knots = knots, degree = 3)3 0.3280
## bs(sbp_c120, knots = knots, degree = 3)4 0.6314
## bs(sbp_c120, knots = knots, degree = 3)5 4.4989
## bs(sbp_c120, knots = knots, degree = 3)6 9.8093
##
## Concordance= 0.604 (se = 0.011 )
## Likelihood ratio test= 96.33 on 6 df, p=<2e-16
## Wald test = 103.5 on 6 df, p=<2e-16
## Score (logrank) test = 112.3 on 6 df, p=<2e-16
termplot(
model_spline,
terms = 1, # first term is the bs(sbp_c150, ...) spline
se = TRUE,
rug = TRUE,
col.term = "blue",
col.se = "red",
main = "Partial Effect of SBP (B-spline) on Log Hazard",
xlab = "SBP centered at 120 mmHg",
ylab = "Partial log hazard",
ylim=c(-6,5)
)
# fit Cox model with a 3-df natural spline for sbp_c120 plus adv_HIV as a covariate
model_spline <- coxph(
Surv(survmonth, event) ~ ns(sbp_c120, df=3),
data = dat,
ties = "efron"
)
summary(model_spline)
## Call:
## coxph(formula = Surv(survmonth, event) ~ ns(sbp_c120, df = 3),
## data = dat, ties = "efron")
##
## n= 4998, number of events= 749
##
## coef exp(coef) se(coef) z Pr(>|z|)
## ns(sbp_c120, df = 3)1 -1.92262 0.14622 0.23230 -8.276 < 2e-16 ***
## ns(sbp_c120, df = 3)2 -4.09300 0.01669 0.80929 -5.057 4.25e-07 ***
## ns(sbp_c120, df = 3)3 -1.45872 0.23253 0.81783 -1.784 0.0745 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## ns(sbp_c120, df = 3)1 0.14622 6.839 0.092743 0.23054
## ns(sbp_c120, df = 3)2 0.01669 59.919 0.003416 0.08153
## ns(sbp_c120, df = 3)3 0.23253 4.300 0.046811 1.15512
##
## Concordance= 0.601 (se = 0.012 )
## Likelihood ratio test= 83.82 on 3 df, p=<2e-16
## Wald test = 96.41 on 3 df, p=<2e-16
## Score (logrank) test = 103.3 on 3 df, p=<2e-16
termplot(
model_spline,
terms = 1,
se = TRUE,
rug = TRUE,
col.term = "blue",
col.se = "red",
main = "Partial Effect of SBP (natural cubic spline) on Log Hazard",
xlab = "SBP centered at 120 mmHg",
ylab = "Partial log hazard",
ylim=c(-6,5)
)
# Fit Cox model with main effects of SBP and advanced HIV in a single model
model3 <- coxph(
Surv(survmonth, event) ~ sbp_c120 + as.factor(adv_HIV),
data = dat,
ties = "efron"
)
summary(model3)
## Call:
## coxph(formula = Surv(survmonth, event) ~ sbp_c120 + as.factor(adv_HIV),
## data = dat, ties = "efron")
##
## n= 3038, number of events= 398
## (1960 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## sbp_c120 -0.016011 0.984117 0.003723 -4.301 1.70e-05 ***
## as.factor(adv_HIV)1 0.686845 1.987435 0.116028 5.920 3.23e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## sbp_c120 0.9841 1.0161 0.977 0.9913
## as.factor(adv_HIV)1 1.9874 0.5032 1.583 2.4949
##
## Concordance= 0.631 (se = 0.015 )
## Likelihood ratio test= 60.01 on 2 df, p=9e-14
## Wald test = 54.9 on 2 df, p=1e-12
## Score (logrank) test = 56.14 on 2 df, p=6e-13
Evidence of significant effect of both SBP and advanced HIV on the time to death
# Fit Cox model with SBP × advanced HIV interaction
model4 <- coxph(
Surv(survmonth, event) ~ sbp_c120 * as.factor(adv_HIV),
data = dat,
ties = "efron"
)
summary(model4)
## Call:
## coxph(formula = Surv(survmonth, event) ~ sbp_c120 * as.factor(adv_HIV),
## data = dat, ties = "efron")
##
## n= 3038, number of events= 398
## (1960 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## sbp_c120 -0.010847 0.989212 0.007533 -1.440 0.150
## as.factor(adv_HIV)1 0.609250 1.839052 0.151291 4.027 5.65e-05 ***
## sbp_c120:as.factor(adv_HIV)1 -0.006764 0.993259 0.008665 -0.781 0.435
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## sbp_c120 0.9892 1.0109 0.9747 1.004
## as.factor(adv_HIV)1 1.8391 0.5438 1.3671 2.474
## sbp_c120:as.factor(adv_HIV)1 0.9933 1.0068 0.9765 1.010
##
## Concordance= 0.63 (se = 0.015 )
## Likelihood ratio test= 60.61 on 3 df, p=4e-13
## Wald test = 57.16 on 3 df, p=2e-12
## Score (logrank) test = 59.59 on 3 df, p=7e-13
No evidence of a dependence of the effect of advanced HIV on the average SBP on time to death
# Fit Cox model with several covariates
model5 <- coxph(
Surv(survmonth, event) ~ sbp_c120 + as.factor(adv_HIV)
+ as.factor(male.gender) + age,
data = dat,
ties = "efron"
)
summary(model5)
## Call:
## coxph(formula = Surv(survmonth, event) ~ sbp_c120 + as.factor(adv_HIV) +
## as.factor(male.gender) + age, data = dat, ties = "efron")
##
## n= 3038, number of events= 398
## (1960 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## sbp_c120 -0.019796 0.980399 0.003810 -5.196 2.04e-07 ***
## as.factor(adv_HIV)1 0.587573 1.799616 0.118416 4.962 6.98e-07 ***
## as.factor(male.gender)1 0.622844 1.864222 0.108476 5.742 9.37e-09 ***
## age 0.004928 1.004940 0.004644 1.061 0.289
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## sbp_c120 0.9804 1.0200 0.9731 0.9877
## as.factor(adv_HIV)1 1.7996 0.5557 1.4269 2.2697
## as.factor(male.gender)1 1.8642 0.5364 1.5072 2.3059
## age 1.0049 0.9951 0.9958 1.0141
##
## Concordance= 0.654 (se = 0.015 )
## Likelihood ratio test= 97.02 on 4 df, p=<2e-16
## Wald test = 93.94 on 4 df, p=<2e-16
## Score (logrank) test = 95.94 on 4 df, p=<2e-16
# Fit Cox model with several covariates and splines
model6 <- coxph(
Surv(survmonth, event) ~ ns(sbp_c120, df=3) + ns(age, df=3)
+ as.factor(male.gender)
+ adv_HIV + tt(adv_HIV),
tt = function(x, t, ...) x * t,
data = dat,
ties = "efron"
)
model6 <- coxph(
Surv(survmonth, event) ~ ns(sbp_c120, df=3) + ns(age, df=3)
+ as.factor(male.gender)
+ as.factor(adv_HIV),
data = dat,
ties = "efron"
)
summary(model6)
## Call:
## coxph(formula = Surv(survmonth, event) ~ ns(sbp_c120, df = 3) +
## ns(age, df = 3) + as.factor(male.gender) + as.factor(adv_HIV),
## data = dat, ties = "efron")
##
## n= 3038, number of events= 398
## (1960 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## ns(sbp_c120, df = 3)1 -1.921020 0.146457 0.310251 -6.192 5.95e-10 ***
## ns(sbp_c120, df = 3)2 -4.609319 0.009959 0.988404 -4.663 3.11e-06 ***
## ns(sbp_c120, df = 3)3 -1.296022 0.273618 0.931644 -1.391 0.164
## ns(age, df = 3)1 0.368627 1.445748 0.251386 1.466 0.143
## ns(age, df = 3)2 -0.270376 0.763092 0.657682 -0.411 0.681
## ns(age, df = 3)3 -0.293124 0.745930 0.488120 -0.601 0.548
## as.factor(male.gender)1 0.630648 1.878828 0.109065 5.782 7.37e-09 ***
## as.factor(adv_HIV)1 0.569175 1.766808 0.118548 4.801 1.58e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## ns(sbp_c120, df = 3)1 0.146457 6.8279 0.079731 0.26903
## ns(sbp_c120, df = 3)2 0.009959 100.4158 0.001435 0.06911
## ns(sbp_c120, df = 3)3 0.273618 3.6547 0.044069 1.69887
## ns(age, df = 3)1 1.445748 0.6917 0.883309 2.36632
## ns(age, df = 3)2 0.763092 1.3105 0.210260 2.76947
## ns(age, df = 3)3 0.745930 1.3406 0.286556 1.94172
## as.factor(male.gender)1 1.878828 0.5322 1.517227 2.32661
## as.factor(adv_HIV)1 1.766808 0.5660 1.400495 2.22893
##
## Concordance= 0.662 (se = 0.014 )
## Likelihood ratio test= 115.2 on 8 df, p=<2e-16
## Wald test = 124.3 on 8 df, p=<2e-16
## Score (logrank) test = 131.7 on 8 df, p=<2e-16
termplot(model=model6, terms=c(1,2,3,4),
se = TRUE,
rug = TRUE,
col.term = "blue",
col.se = "red"
)
# Martingale residuals using model 1
htndat$mart_resid <- residuals(model1, type = "martingale")
plot(
htndat$SBP-120,
htndat$mart_resid,
xlab = "Systolic Blood Pressure (mmHg)",
ylab = "Martingale residuals",
main = "Martingale Residuals vs. SBP",
col="gray"
)
lines(
lowess(htndat$SBP-120, htndat$mart_resid, f = 0.5),
col = "red",
lwd = 3
)
## Using a test of hypothesis (H0: proportional hazards holds)
cox.zph(univ_cox)
## chisq df p
## adv_HIV 3.65 1 0.056
## GLOBAL 3.65 1 0.056
## Using Schoenfeld residuals
plot(cox.zph(univ_cox), lwd=3)
abline(h=0,col="red", lwd=2) # add red line at 0
# Fully parametric fits via survreg
## Weibull AFT model
wei_aft <- survreg(surv_obj ~ as.factor(adv_HIV),
data = htndat,
dist = "weibull")
summary(wei_aft)
##
## Call:
## survreg(formula = surv_obj ~ as.factor(adv_HIV), data = htndat,
## dist = "weibull")
## Value Std. Error z p
## (Intercept) 5.9704 0.1479 40.36 <2e-16
## as.factor(adv_HIV)1 -0.7601 0.1300 -5.85 5e-09
## Log(scale) 0.0869 0.0421 2.06 0.039
##
## Scale= 1.09
##
## Weibull distribution
## Loglik(model)= -2485 Loglik(intercept only)= -2505
## Chisq= 40 on 1 degrees of freedom, p= 2.5e-10
## Number of Newton-Raphson Iterations: 10
## n=3038 (1960 observations deleted due to missingness)
# Get the PH model parameters from the AFT model fitted earlier
# Extract AFT scale
sigma_aft <- wei_aft$scale
# Convert to PH shape
shape_ph <- 1 / sigma_aft
# AFT coefficient for adv_HIV
beta_aft <- wei_aft$coefficients
# Convert to PH log–hazard ratio
beta_ph <- - beta_aft / sigma_aft
hr_ph <- exp(beta_ph)
# Display and note similarity to the Cox model fitted earlier
shape_ph
## [1] 0.9167777
beta_ph
## (Intercept) as.factor(adv_HIV)1
## -5.4735319 0.6968243
hr_ph
## (Intercept) as.factor(adv_HIV)1
## 0.004196385 2.007367801