# Explore the data structure using str()
str(htndat)
## 'data.frame': 4998 obs. of 17 variables:
## $ DBP : int 60 75 60 60 60 60 59 60 70 58 ...
## $ SBP : int 90 110 80 90 100 120 100 100 120 110 ...
## $ BMI : num NA 27.3 17.7 19.9 21.3 ...
## $ age : num 28 26.5 43 50.1 30.6 ...
## $ married : int 0 1 0 1 0 1 0 0 1 1 ...
## $ male.gender : int 0 0 0 1 1 0 0 1 0 0 ...
## $ hgb_centered : num NA -3.9 -3.2 NA -0.4 ...
## $ adv_HIV : int NA 0 NA NA NA NA NA NA NA NA ...
## $ survtime : int 338 439 752 526 215 272 1669 388 14 419 ...
## $ event : int 1 1 1 1 1 1 1 1 1 1 ...
## $ arv_naive : int 1 1 1 1 1 1 1 0 1 1 ...
## $ urban.clinic : int 0 1 0 1 0 0 1 0 0 0 ...
## $ log_creat_centered: num NA 5.42e-02 -3.60e-01 NA -1.00e-07 ...
## $ IPW_weight : num 0.924 1.164 0.721 0.829 0.856 ...
## $ SBP_ge120 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ survmonth : num 12 15 26 18 8 10 56 13 1 14 ...
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
# Explore % of missing values by variable
miss <- apply(htndat, 2, function(x) sum(is.na(x))/nrow(htndat)*100)
# Use kable() function from kableExtra package to get a clean table
miss <- miss[miss != 0] %>%
kableExtra::kable(booktabs = TRUE,
digits = 2,
col.names = "",
caption="Missing data percentage among variables in the dataset") %>%
kableExtra::kable_styling(latex_options = c("striped", "HOLD_position")
)
miss
BMI | 13.39 |
married | 3.26 |
hgb_centered | 27.87 |
adv_HIV | 39.22 |
log_creat_centered | 30.75 |
## ----summaries and tables for HTN data -----------------------------------------------------
# Create a summary table
gtsummary::tbl_summary(data = htndat,
by = adv_HIV,
include = c(SBP, BMI, age, married,
male.gender, hgb_centered, event,
arv_naive, urban.clinic,
log_creat_centered,
SBP_ge120, adv_HIV),
label = list(adv_HIV = "Advanced HIV, 1 = Yes",
event = "Event (Death)"),
missing = "ifany",
missing_text = "Missing %",
missing_stat = "{p_miss}") %>%
gtsummary::bold_labels() %>%
gtsummary::add_overall() %>%
gtsummary::add_p()
Characteristic | Overall N = 3,0381 |
0 N = 1,1301 |
1 N = 1,9081 |
p-value2 |
---|---|---|---|---|
SBP | 110 (100, 120) | 110 (100, 120) | 110 (100, 120) | 0.003 |
BMI | 21.0 (18.9, 23.4) | 21.9 (19.7, 24.7) | 20.5 (18.6, 22.8) | <0.001 |
    Missing % | 5.4 | 5.5 | 5.3 | |
age | 34 (27, 42) | 31 (26, 38) | 35 (29, 44) | <0.001 |
married | 1,773 (59%) | 693 (62%) | 1,080 (58%) | 0.010 |
    Missing % | 1.9 | 1.9 | 1.9 | |
male.gender | 743 (24%) | 206 (18%) | 537 (28%) | <0.001 |
hgb_centered | 0.20 (-1.60, 1.70) | 0.60 (-1.10, 1.80) | -0.20 (-2.00, 1.50) | <0.001 |
    Missing % | 12 | 12 | 12 | |
Event (Death) | 398 (13%) | 99 (8.8%) | 299 (16%) | <0.001 |
arv_naive | 2,871 (95%) | 1,057 (94%) | 1,814 (95%) | 0.073 |
urban.clinic | 1,526 (50%) | 573 (51%) | 953 (50%) | 0.7 |
log_creat_centered | -0.09 (-0.28, 0.10) | -0.13 (-0.29, 0.06) | -0.08 (-0.28, 0.12) | 0.002 |
    Missing % | 12 | 13 | 12 | |
SBP_ge120 | 174 (5.7%) | 63 (5.6%) | 111 (5.8%) | 0.8 |
1 Median (Q1, Q3); n (%) | ||||
2 Wilcoxon rank sum test; Pearson’s Chi-squared test |
## ---view table of HTN data
# Inspect key variables
str(htndat[c("survmonth", "event")])
## 'data.frame': 4998 obs. of 2 variables:
## $ survmonth: num 12 15 26 18 8 10 56 13 1 14 ...
## $ event : int 1 1 1 1 1 1 1 1 1 1 ...
# 'survmonth': num ...
# 'event' : int ...
## ----Create survival object, fit KM curve
# Create the survival object
surv_obj <- with(htndat, Surv(time = survmonth, event = event))
# Explanation:
# - 'time' = months from baseline to death or censoring
# - 'event' = 1 if death occurred, 0 if censored
# Fit overall KM curve
km_overall <- survfit(surv_obj ~ 1, data = htndat)
print(km_overall)
## Call: survfit(formula = surv_obj ~ 1, data = htndat)
##
## n events median 0.95LCL 0.95UCL
## [1,] 4998 749 NA NA NA
# basic plot
plot(km_overall, xlab="Months", ylab="Survival proportion")
plot(km_overall, xlab="Months", ylab="Survival proportion",
ylim=c(.6,1),
conf.int=FALSE)
# fancy plot
p = ggsurvplot(
km_overall,
data = htndat,
risk.table = TRUE,
conf.int = TRUE,
xlab = "Time (months)",
ylab = "Survival probability",
ggtheme = theme_minimal()
)
print(p)