library(WR)
library(survival)
library(tidyverse) # For data wrangling and visualization
library(knitr)
library(glmnet)
library(patchwork)
library(rmt)
Regularized win ratio regression for variable selection and risk prediction
Simulation studies and a real-world application
\[ \newcommand{\wh}{\widehat} \newcommand{\wt}{\widetilde} \def\bs{\boldsymbol} \newcommand{\indep}{\perp \!\!\! \perp} \def\T{{ \mathrm{\scriptscriptstyle T} }} \def\pr{{\rm pr}} \def\d{{\rm d}} \def\W{{\mathcal W}} \]
This document provides a step-by-step guide to repoduce the numerical results in the simulations studies (Section 3) and real data analysis (Section 4) in the manuscript.
- Note: the real data used in the analysis are not publicly available.
Basic functions
Function definitions
# Get minimum of a vector, return Inf if vector is null or all elements are NA
# Handles edge cases where the input vector is null or entirely NA.
<- function(x){
min_na_inf if(all(is.na(x)) | is.null(x)){
# Return Inf if input is null or all elements are NA
return(Inf)
else {
} # Otherwise, return the minimum value, ignoring NA
return(min(x, na.rm = TRUE))
}
}
# Extract first time where status == k by time t
# Outputs Inf if no such time exists
# Arguments:
# - time: Vector of time points
# - status: Vector of statuses corresponding to time points
# - k: The specific status of interest
# - t: Upper limit of time (default Inf)
<- function(time, status, k, t = Inf){
extact_time_with_status # Filter times where status equals k and time is within the limit, then find the minimum
== k & time <= t] |> min_na_inf()
time[status
}
# Transform outcomes data by standard win function (Pocock's setting)
# Groups data by subject and converts long format to wide format
# Then computes pairwise wins and losses
<- function(df_y, n){
poc_win <- df_y |>
df_y_wide group_by(id) |>
reframe(
results = long_to_wide_death_nf(time, status)
|>
) mutate(
names = rep(c("death_time", "nonfatal_time", "fu_time"), n)
|>
) pivot_wider(
names_from = names,
values_from = results
)
# Generate all pairwise combinations
<- crossing(i = df_y_wide, j = df_y_wide) %>%
pairwise_tibble filter(i |> row_number() < j |> row_number()) |>
unnest(c(i, j), names_sep = "_")
# Compute pairwise wins and losses
<- pairwise_tibble |>
pw_win mutate(
win_death = case_when(
< pmin(i_death_time, i_fu_time) ~ 1,
j_death_time < pmin(j_death_time, j_fu_time) ~ -1,
i_death_time TRUE ~ 0
),win_nonfatal = if_else(win_death == 0, case_when(
< pmin(i_nonfatal_time, i_fu_time) ~ 1,
j_nonfatal_time < pmin(j_nonfatal_time, j_fu_time) ~ -1,
i_nonfatal_time TRUE ~ 0
0),
), win = win_death + win_nonfatal,
component = case_when(
!= 0 ~ "death",
win_death != 0 ~ "nonfatal",
win_nonfatal TRUE ~ "tie"
)|>
) select(
id_i = i_id,
id_j = j_id,
win,
component|>
) filter(win != 0) # Remove pairs with win == 0
# Remove pairs with win == 0 in pairwise tibble as well
# by left joining with
<- pw_win |> select(id_i, id_j) |>
pairwise_tibble left_join(pairwise_tibble, by = c("id_i" = "i_id", "id_j" = "j_id"))
list(pairwise_tibble = pairwise_tibble, pw_win = pw_win)
}
# Reorganize long outcomes (death, nonfatal, censor) to wide format
# Returns a vector with death time, nonfatal event time, and follow-up time
<- function(time, status, deathcode = 1, nfcode = 2, t = Inf){
long_to_wide_death_nf
<- min(max(time), t)
fu_time <- extact_time_with_status(time, status, deathcode, fu_time)
death_time <- extact_time_with_status(time, status, nfcode, fu_time)
nonfatal_time
<- list(
obj death_time = death_time,
nonfatal_time = nonfatal_time,
fu_time = fu_time
)# return(obj)
return(c(death_time, nonfatal_time, fu_time))
}
# Transform data from subject-wise long format to pairwise win-loss format
# Includes information on the deciding component (death or nonfatal events)
# Arguments:
# - df: Data frame with (id, time, status) and potentially other covariates
<- function(df) {
pair_win_loss_transform # Separate outcomes with baseline covariates
# Covariates
<- df |>
Zn group_by(id) |>
slice(1) |>
select(-c(time, status))
# Number of covariates
<- ncol(Zn) - 1
p
# Flatten outcome data and self-pair --------------------------------------
# Specific to win function wfun
# df_y -> pairwise_tibble, pw_win
<- df |>
df_y select(id, time, status)
<- nrow(Zn)
n
<- poc_win(df_y, n)
poc_win_obj
# Extract pairwise data
<- poc_win_obj$pairwise_tibble
pairwise_tibble <- poc_win_obj$pw_win
pw_win # Self-pair covariates
<- crossing(i = Zn, j = Zn) %>%
pw_Z filter(i |> row_number() < j |> row_number()) |>
unnest(c(i, j), names_sep = "_") |>
# Right joint with pw_win to remove pairs with win ==0
right_join(pw_win |> select(id_i, id_j),
by = c("i_id" = "id_i", "j_id" = "id_j"))
# Compute difference in covariates
<- pw_Z |>
pw_Zd select(id_i = i_id, id_j = j_id) |>
bind_cols(
# Z_i - Z_j
|> select(starts_with("i_")) |> rename_with(~ str_remove(., "i_")) |> select(-id) -
pw_Z |> select(starts_with("j_")) |> rename_with(~ str_remove(., "j_")) |> select(-id)
pw_Z
)
# Merge Zd with pw_win by id_i and id_j
<- pw_win |>
pw_df left_join(pw_Zd, by = c("id_i", "id_j")) |>
mutate(
win = win |> as.factor(),
component = component |> as.factor()
)
pw_df
}
# Split data into training and test sets
# Arguments:
# - df: Data frame with (id, time, status) and potentially other covariates
# - prop: Proportion of data to be used for training
# - strata: Whether to stratify the split based on event types
# Returns a list with training and test data frames
<- function(df, prop = 0.8, strata = TRUE){
wr_split if (strata){
<- df |>
out_id_train group_by(id) |>
summarize(
out = case_when(
any(status == 1) & any(status == 2) ~ 1,
any(status == 1) ~ 2,
any(status == 2) ~ 3,
TRUE ~ 4
)|>
) group_by(out) |>
mutate(
split = sample(c("train", "test"), n(), replace = TRUE, prob = c(prop, 1 - prop))
|>
) ungroup() |>
select(-out)
else {
} <- df |>
out_id_train distinct(id) |>
mutate(split = sample(c("train", "test"), n(), replace = TRUE, prob = c(prop, 1 - prop)))
}
<- df |>
df_split inner_join(out_id_train, by = "id")
list(df_train = df_split |> filter(split == "train") |> select(-split),
df_test = df_split |> filter(split == "test") |> select(-split))
}
# Main function to fit a penalized win ratio regression with pathwise solution
# Arguments (long format):
# - id, time, status: Subject identifiers, event times, and event statuses
# - Z: Matrix of covariates
# - ...: Additional arguments passed to glmnet (lambda, alpha, etc.)
<- function(id, time, status, Z, ...){
wrnet
<- tibble(id, time, status, Z)
df # Transform data to pairwise win-loss format
<- pair_win_loss_transform(df)
pw_df
<- pw_df$win
y <- pw_df |> select(-c(id_i, id_j, win, component))
x
# Use glmnet for logistic regression
<- glmnet(
obj x = x,
y = y,
family = "binomial",
# Constrain intercept = 0
intercept = FALSE,
...
)
$Zn <- df |> group_by(id) |> slice(1) |> ungroup() |> select(-c(id, time, status)) |> as.matrix()
obj
class(obj) <- "wrnet"
obj
}
# Create default lambda sequence for regularization
# Arguments:
# - id, time, status: Subject identifiers, event times, and statuses
# - Z: Covariate matrix
<- function(id, time, status, Z){
create_default_lambda <- as.matrix(Z)
Z <- tibble(id, time, status, Z)
df <- length(unique(id))
n <- ncol(Z)
p # Transform data to pairwise win-loss format
<- pair_win_loss_transform(df)
pw_df <- pw_df$win
y <- pw_df |> select(-c(id_i, id_j, win, component))
x <- scale(x)
x_scaled <- nrow(x)
N
<- max(abs(1 / N * t(x_scaled) %*% (1/N - (y == unique(y)[1]))), na.rm = TRUE)
lambda.max <- ifelse(n <= p, 0.01, 0.0001)
lambda_min_ratio <- lambda.max * lambda_min_ratio^seq(0, 1, length.out = 100)
lambda
return(lambda)
}
# Cross-validation for wrnet
# Arguments:
# - id, time, status: Subject identifiers, event times, and statuses
# - Z: Covariate matrix
# - k: Number of folds
# - strata: Whether to stratify folds based on event types
<- function(id, time, status, Z, k = 10, strata = TRUE, lambda = NULL, ...){
cv_wrnet
# Generate default lambda sequence if not provided
if (is.null(lambda)){
<- create_default_lambda(id, time, status, Z)
lambda
}
<- tibble(id, time, status, Z)
df_train
# Generate folds
if (strata){
<- df_train |>
out_id_train_cv group_by(id) |>
summarize(
out = case_when(
any(status == 1) & any(status == 2) ~ 1,
any(status == 1) ~ 2,
any(status == 2) ~ 3,
TRUE ~ 4
)|>
) group_by(out) |>
mutate(
fold = sample(1:k, n(), replace = TRUE)
|>
) ungroup() |>
select(-out)
else {
} <- df_train |>
out_id_train_cv distinct(id) |>
mutate(fold = sample(1:k, n(), replace = TRUE))
}
# Transform each fold to pairwise format
<- df_train |>
df_train_cv inner_join(out_id_train_cv, by = "id") |>
select(fold, everything()) |>
group_by(fold) |>
nest() |>
mutate(
pw_data = map(data, ~ .x |> pair_win_loss_transform())
|>
) select(-data)
<- function(k, lambda, ...){
fit_pw_fold <- df_train_cv |> filter(fold != k) |> unnest(pw_data) |> ungroup()
df_train_cv_k # Fit regularized logistic regression model
<- glmnet(
pw_fit x = df_train_cv_k |> select(-(1:5)) |> as.matrix(),
y = df_train_cv_k$win,
family = "binomial",
# constrain intercept = 0
intercept = FALSE,
lambda= lambda,
...
)
<- as.matrix(coef(pw_fit))[-1, ]
Beta
<- df_train_cv |> filter(fold == k) |> unnest(pw_data)|> ungroup()
df_val_cv_k
<- df_val_cv_k |>
pw_win_test_score select((1:5)) |>
bind_cols(
as_tibble(df_val_cv_k |> select(-(1:5)) |> as.matrix() %*% Beta)
)
# Compute concordance index
<- pw_win_test_score |>
cind_lambda mutate(
win = case_when(
== "1" ~ 1,
win == "-1" ~ -1
win
),across(-c(fold, id_i,id_j,win,component), \(x) if_else(x == 0, 0.5, sign(x) * win == 1))
|>
) summarize(
across(-c(fold, id_i,id_j,win,component), mean)
|>
) pivot_longer(
everything(),
names_to = "lambda_name",
values_to = "concordance"
|>
) mutate(
lambda = pw_fit$lambda
)
cind_lambda
}
<- function(k) fit_pw_fold(k, lambda, ...)
fit_pw_fold_param
<- tibble(
cv_results fold = 1:k,
cind_lambda = map(fold, ~ fit_pw_fold_param(.x))
)
<- cv_results |>
cv_results_expanded unnest(cind_lambda) |>
group_by(lambda) |>
summarize(
concordance = mean(concordance)
)
cv_results_expanded
}
# Test the fitted wrnet model on test data
# Arguments:
# - fit: Fitted wrnet object
# - df_test: Test dataset with (id, time, status, covariates)
<- function(fit, df_test){
test_wrnet
# fit <- final_fit
# Extract beta
<- as.matrix(fit$beta)
beta
<- df_test |> group_by(id) |> slice(1) |> select(-c(time, status)) |> ungroup()
Zn_test <- tibble(
score_test id = Zn_test$id,
win_score = Zn_test |> select(-id) |> as.matrix() %*% beta |> as.vector(),
risk_score = - win_score
)
<- crossing(i = score_test, j = score_test) %>%
pw_score_test filter(i |> row_number() < j |> row_number()) |>
unnest(c(i, j), names_sep = "_") |>
select(
id_i = i_id,
id_j = j_id,
win_score_i = i_win_score,
win_score_j = j_win_score
)
<- df_test |>
df_y_test select(id, time, status)
<- nrow(Zn_test)
n_test
<- poc_win(df_y_test, n_test)
poc_win_obj
# Extract pairwise data
<- poc_win_obj$pw_win
pw_win_test
# Merge win-loss outcomes with risk scores
<- pw_win_test |>
pw_win_score_test left_join(pw_score_test, by = c("id_i" = "id_i", "id_j" = "id_j"))
# Overall and component-wise concordance
<- pw_win_score_test |>
concordance bind_rows(
|> mutate(component = "overall")
pw_win_score_test |>
) mutate(
score_diff = win_score_i - win_score_j,
concord = if_else(score_diff == 0, 0.5, sign(score_diff) == win)
|>
) group_by(component) |>
summarize(
concordance = mean(concord, na.rm = TRUE)
)
list(concordance = concordance, score_test = score_test, pw_win_score_test = pw_win_score_test)
}
# Computes variable importance based on standardized coefficients
# Arguments:
# - fit: Fitted wrnet object
<- function(fit){
vi_wrnet
<- fit$beta
beta <- fit$Zn
Zn
<- apply(Zn, 2, sd)
sds <- abs(beta * sds)
importance
<- tibble(
obj vars = rownames(beta),
importance = as.vector(importance)
|>
) arrange(
desc(importance)
|>
) filter(
> 0
importance
)
class(obj) <- "vi_wrnet"
obj
}
# Plot top k variables based on variable importance
# Arguments:
# - x: Variable importance object (vi_wrnet)
# - k: Number of top variables to display (default is top 10 or all if fewer than 20)
<- function(x, k = NULL){
vip.vi_wrnet
<- tibble(
vip_tbl vars = x$vars,
importance = x$importance
)
<- nrow(vip_tbl)
m
if (is.null(k)){
<- ifelse(m <= 20, m, 10)
k else(
} k = min(k, m)
)
|>
vip_tbl head(k) |>
ggplot(
aes(y = reorder(vars, importance), x = importance)
+
) geom_col() +
labs(
x = "Importance",
y = NULL
+
) theme_minimal()
}
<- function(x) {
logit
<- ifelse(x == 1,
x 1 - 1e-12,
x)<- ifelse(x == 0,
x 1e-12,
x)# return results
log(x / (1 - x))
}
# A streamlined function to automate the process
# Data partitioning -> CV > Final Model -> Test
<- function(df, k = 10, prop = 0.8, lambda = NULL, cox = FALSE, ...){
wrnet_pipeline
# Split data into training and test sets
<- wr_split(df, prop = prop)
split <- split$df_train
df_train <- split$df_test
df_test
# Cross-validation
<- cv_wrnet(df_train$id, df_train$time, df_train$status, df_train |> select(-c(id, time, status)),
cv_results k = k, lambda = lambda, ...)
# Fit final model
<- cv_results |>
lambda_final filter(concordance == max(concordance)) |>
pull(lambda)
<- wrnet(df_train$id, df_train$time, df_train$status, df_train |> select(-c(id, time, status)),
fit lambda = lambda_final, ...)
<- as.vector(fit$beta)
beta <- rownames(fit$beta)
varnames # Test the model
<- test_wrnet(fit, df_test)
test_results <- test_results$concordance
concordance
<- list(varnames = varnames, beta = beta, concordance = concordance)
result
if (cox) {
# Fit regularized Cox model by glmnet
# Subset to first events
<- df_train |>
df_train_tfe group_by(id) |>
arrange(time) |>
slice(1) |>
mutate(
status = status > 0,
time = if_else(time <= 0, 0.0001, time) # fix 0 times
|>
) ungroup()
# Fit regularized cox regression model
# 10-fold CV
<- cv.glmnet(
cox_cvfit x = df_train_tfe |> select(-c(id, time, status)) |> as.matrix(),
y = df_train_tfe |> select(time, status) |> as.matrix(),
family = "cox",
type.measure = "C", # C-index for CV
...
)
<- as.vector(coef(cox_cvfit, s = "lambda.min"))
cox_beta # Trick test_wrnet() to get risk score for Cox model
$beta <- - cox_beta
cox_cvfit# Test the final model on test set
<- cox_cvfit |> test_wrnet(df_test)
cox_test_result # Test C-index
<- cox_test_result$concordance
cox_concordance
$cox_beta<- cox_beta
result$cox_concordance <- cox_concordance
result
}
result
}
# Functions for simulation studies ----------------------------------------
# Generate AR(1) correlation matrix
<- function(p, rho) {
ar1_corr_matrix matrix(rho^abs(row(matrix(1:p, p, p)) - col(matrix(1:p, p, p))), nrow = p)
}
# Simulate n-sample from AR(1) correlation matrix
<- function(n, p, rho) {
simu_Z_ar1 <- ar1_corr_matrix(p, rho)
Sigma ::mvrnorm(n = n, mu = rep(0, p), Sigma = Sigma)
MASS
}
# Simulate bivariate Gumbel random variables --------------------
<- function(n, betaD0, betaH0,rho, lambdaH, lambdaD, kappa, lambdaC, fmin, fmax) {
generate_gb_ar1Z_data # Censoring
# Expn(lambdaC) \wedge U(fmin, fmax)
<- pmin(rexp(n, lambdaC), (fmax - fmin) * runif(n) + fmin)
C # Simulate bivariate Gumbel random variables
<- rgumbel(n, alpha = kappa, dim = 2, method = 1)
outcomes
# Simulate covariates (Z1 Bernoulli(0.5); Z2 Z3: N(0, 1))
<- length(betaD0)
p <- as_tibble(simu_Z_ar1(n, p, rho))
Z
<- as.matrix(Z)
Zmat
# Generate outcomes based on conditional dist
<- outcomes[, 1]
U1 <- outcomes[, 2]
U2 <- -log(U1) / (lambdaH * exp(- Zmat %*% betaH0))
nonfatal_time <- -log(U2) / (lambdaD * exp(- Zmat %*% betaD0))
death_time # Combine data to wide format
<- tibble(
df_wide id = 1:n,
nonfatal_time = as.vector(nonfatal_time),
death_time = as.vector(death_time),
censor_time = C,
Z
)
# Wide to long
<- df_wide |>
df group_by(id) |>
reframe(
ts = time_fun(death_time, nonfatal_time, censor_time)
|>
) unnest(ts) |>
left_join(
tibble(id = 1:n, Z),
by = "id")
df
}
# Helper function of generate_gb_data ----------------------------
<- function(death_time, nonfatal_time, censor_time) {
time_fun
<- min(death_time, censor_time)
X <- (death_time <= censor_time) + 0
delta if (nonfatal_time < X) {
<- tibble(
data time = c(nonfatal_time, X),
status = c(2, delta)
)else {
} <- tibble(
data time = X,
status = delta
)
}
return(data)
}
Simulation Studies
Simulation set-up
- Covariates: \(z = (z_{\cdot 1}, z_{\cdot 2}, \ldots, z_{\cdot p})^\T\)
- \(z_{\cdot k} \sim N(0,1)\)
- Correlation strcuture: AR(1) with \(\rho = 0.1\)
- Outcome model: \[\begin{equation}\tag{1}
\pr(D > s, T > t\mid z) = \exp\left(-\left[\{\exp(-\beta_D^\T z)\lambda_Ds\}^\kappa + \{\exp(-\beta_H^\T z)\lambda_Ht\}^\kappa\right]^{1/\kappa}\right),
\end{equation}\]
- \(\lambda_D = 0.1\)
- \(\lambda_H = 1\)
- \(\kappa = 1.25\)
- Censoring: \(\mbox{Un}[0.2, 4]\wedge\mbox{Expn}(\lambda_C)\)
- \(\lambda_C = 0.02\)
- Scenarios
- Same effect pattern on components \[ \beta_D=\beta_H=(\underbrace{0.5,\ldots, 0.5}_{10}, \underbrace{0,\ldots, 0}_{p - 10}). \]
- Different effect patterns on components \[\begin{align*} \beta_D&=(\underbrace{0.75,\ldots, 0.75}_{5}, \underbrace{0,\ldots, 0}_{p - 5});\\ \beta_H&=(\underbrace{0,\ldots, 0}_{5}, \underbrace{0.75,\ldots, 0.75}_{5}, \underbrace{0,\ldots, 0}_{p - 10}) \end{align*}\]
Model parameter specification
## Individual experiments
# Fix parameters
<- 1.25
kappa #hazard rates
<- 1
lambdaH <- 0.1
lambdaD #censoring
<- 0.02
lambdaC <- 0.2
fmin <- 4
fmax # beta0=c(0,-0)
# AR1 correlation among covariates
<- 0.1 rho
Simulation data generation
library(WR)
library(gumbel)
library(tidyverse)
source("R code/basic_functions.R")
source("R code/model_parameter_specification.R")
<- 1000
N set.seed(123)
## Function to simulate N datasets with n samples each
<- function(N, n, betaD0, betaH0,rho, lambdaH, lambdaD, kappa, lambdaC, fmin, fmax){
simulate_n tibble(
sim_id = 1:N
%>%
) mutate(
data = map(sim_id, ~ generate_gb_ar1Z_data (n, betaD0, betaH0,rho, lambdaH, lambdaD, kappa, lambdaC, fmin, fmax)) # Generate datasets
)
}
<- 20
p
# Generate data with different sample size n
<- c(200, 500, 1000, 2000)
n_list # ========= Two scenarios ==============
# Scenario 1: Same effect pattern for death and nonfatal ------------------
<- c(rep(1, 10), rep(0, p - 10))
ptt0 <- 0.5 * ptt0
betaD0 <- 0.5 * ptt0
betaH0
# Generate and save data
for (n in n_list){
= simulate_n(N, n, betaD0, betaH0,rho, lambdaH, lambdaD, kappa, lambdaC, fmin, fmax)
data saveRDS(data, paste0("simulated_datasets/s1_data_n", n, ".rds"))
rm(data)
}
# Scenario 2: Different effect pattern for death and nonfatal --------------
<- c(rep(1, 5), rep(0, p-5))
ptt1 <- c(rep(0, 5), rep(1, 5), rep(0, p-10))
ptt2 <- 0.75 * ptt1
betaD0 <- 0.75 * ptt2
betaH0
# Generate and save data
for (n in n_list){
= simulate_n(N, n, betaD0, betaH0,rho, lambdaH, lambdaD, kappa, lambdaC, fmin, fmax)
data saveRDS(data, paste0("simulated_datasets/s2_data_n", n, ".rds"))
rm(data)
}
Table 1: Sensitivity and Specificity
Model fitting
library(WR)
library(tidyverse)
library(glmnet)
source("R code/basic_functions.R")
# Function to fit wrnet and cox safely without throwing error
<- function(x){
wrnet_cox_pipeline wrnet_pipeline(x, cox = TRUE)
}
<- safely(wrnet_cox_pipeline)
safe_wrnet_pipeline
set.seed(123)
options(warn = -1)
for (n in c(200, 500, 1000, 2000)){
# Scenario 1
<- readRDS(paste0("simulated_datasets/s1_data_n", n, ".rds")) |>
s1dat filter(sim_id<=1000)
# Fit wrnet and cox to each dataset
<- s1dat |>
s1wrfit mutate(fit = map(data, safe_wrnet_pipeline)) |>
unnest_longer(fit) |>
filter(fit_id == "result") |>
select(-c(data, fit_id)) |>
unnest_wider(fit)
# Save fitted results
saveRDS(s1wrfit, paste0("fitted_models/s1_wrnet_fit_n", n, ".rds"))
# Scenario 2
<- readRDS(paste0("simulated_datasets/s2_data_n", n, ".rds")) |>
s2dat filter(sim_id<=1000)
# Fit wrnet and cox to each dataset
<- s2dat |>
s2wrfit mutate(fit = map(data, safe_wrnet_pipeline)) |>
unnest_longer(fit) |>
filter(fit_id == "result") |>
select(-c(data, fit_id)) |>
unnest_wider(fit)
# Save fitted results
saveRDS(s2wrfit, paste0("fitted_models/s2_wrnet_fit_n", n, ".rds"))
}
Summarize sensitivity and specificity data
# Read in the fitted model results
<- bind_rows(
wrfit readRDS(paste0("fitted_models/s1_wrnet_fit_n", 200, ".rds")) |>
mutate(n = "n = 200", scn = "Scenario 1"),
readRDS(paste0("fitted_models/s2_wrnet_fit_n", 200, ".rds")) |>
mutate(n = "n = 200", scn = "Scenario 2"),
readRDS(paste0("fitted_models/s1_wrnet_fit_n", 500, ".rds")) |>
mutate(n = "n = 500", scn = "Scenario 1"),
readRDS(paste0("fitted_models/s2_wrnet_fit_n", 500, ".rds")) |>
mutate(n = "n = 500", scn = "Scenario 2"),
readRDS(paste0("fitted_models/s1_wrnet_fit_n", 1000, ".rds")) |>
mutate(n = "n = 1000", scn = "Scenario 1"),
readRDS(paste0("fitted_models/s2_wrnet_fit_n", 1000, ".rds")) |>
mutate(n = "n = 1000", scn = "Scenario 2"),
readRDS(paste0("fitted_models/s1_wrnet_fit_n", 2000, ".rds")) |>
mutate(n = "n = 2000", scn = "Scenario 1"),
readRDS(paste0("fitted_models/s2_wrnet_fit_n", 2000, ".rds")) |>
mutate(n = "n = 2000", scn = "Scenario 2")
|>
) mutate(
n = fct(n)
)
# Summarize sensitivity/specificity data
<- wrfit |>
sens_spec unnest(c(varnames, beta, cox_beta)) |>
select(n, scn, sim_id, varnames, beta, cox_beta) |>
mutate(varnames = fct(varnames)) |>
group_by(n, scn, varnames) |>
summarize(
across(c(beta, cox_beta), ~ mean(.x != 0))
|>
) pivot_longer(
cols = c(beta, cox_beta),
names_to = "model",
values_to = "selection_rate"
|>
) pivot_wider(
names_from = varnames,
values_from = selection_rate
|>
) mutate(
across(V11:V20, ~ (1 - .x)),
across(V1:V20, ~ round(.x, 2))
|>
) group_by(n, scn) |>
arrange(model) |>
summarize(
across(V1:V20, \(x) str_c(x[model=="beta"], " (", x[model=="cox_beta"],")"))
|>
) pivot_longer(
cols = V1:V20,
names_to = "varnames",
values_to = "value"
|>
) pivot_wider(
names_from = c(n, scn),
values_from = value
)
# Take a look
sens_spec
# Take sensitivity data
<- sens_spec |>
sens filter(varnames %in% c("V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10"))
# Simplify column names
colnames(sens)[-1] <- c(rep(c("S1", "S2"), 4))
# Relabel variable names
$varnames <- c("$z_{\\cdot 1}$", "$z_{\\cdot 2}$", "$z_{\\cdot 3}$", "$z_{\\cdot 4}$", "$z_{\\cdot 5}$", "$z_{\\cdot 6}$", "$z_{\\cdot 7}$", "$z_{\\cdot 8}$", "$z_{\\cdot 9}$", "$z_{\\cdot 10}$")
sens
# Output in latex code
<- kable(sens, format = "latex", booktabs = TRUE, escape = FALSE)
latex_code
# Print LaTeX code
cat(latex_code)
kable(sens)
# Take specificity data
<- sens_spec |>
spec filter(varnames %in% c("V11", "V12", "V13", "V14", "V15", "V16", "V17", "V18", "V19", "V20"))
# Simplify column names
colnames(spec)[-1] <- c(rep(c("S1", "S2"), 4))
# Relabel variable names
$varnames <- c("$z_{\\cdot 11}$", "$z_{\\cdot 12}$", "$z_{\\cdot 13}$", "$z_{\\cdot 14}$", "$z_{\\cdot 15}$", "$z_{\\cdot 16}$", "$z_{\\cdot 17}$", "$z_{\\cdot 18}$", "$z_{\\cdot 19}$", "$z_{\\cdot 20}$")
spec
# Output in latex code
<- kable(spec, format = "latex", booktabs = TRUE, escape = FALSE)
latex_code
# Print LaTeX code
cat(latex_code)
kable(spec)
Fig. 1: Boxplots of C-indices
Plot C-indices
# Create data for concordance plot
<- wrfit |>
cind_plot unnest(c(concordance, cox_concordance),
names_sep = "_") |>
select(
component = concordance_component,
n, scn, sim_id, wrnet = concordance_concordance,
cox = cox_concordance_concordance
|>
) pivot_longer(
cols = c(wrnet, cox),
names_to = "model",
values_to = "concordance"
|>
) mutate(
component = factor(component, levels = c("overall", "death", "nonfatal"),
labels = c("Overall", "Death", "Nonfatal")),
comp_x = case_when(
== "Overall" ~ 1,
component == "Death" ~ 1.8,
component == "Nonfatal" ~ 2.2
component
),model = factor(model, levels = c("wrnet", "cox"), labels = c("Win ratio", "Cox"))
)
# Create the plot
|>
cind_plot ggplot(aes(x = comp_x, y = concordance, fill = model)) +
geom_boxplot(data = cind_plot |> filter(component == "Overall")) +
geom_boxplot(data = cind_plot |> filter(component != "Overall"), aes(group = interaction(component, model))) +
facet_grid(n ~ scn, scales = "free_y") +
scale_x_continuous(limits = c(0.5, 2.5), breaks = c(1, 1.8, 2.2),
labels = c("Overall", "Death", "Nonfatal")) +
# scale_y_continuous() +
# scale_y_continuous(limits = c(0.1, 1.1)) +
scale_fill_manual(values = c("#4DAF4A", "#FF7F00")) +
coord_cartesian(ylim = c(0.45, 0.9)) +
theme_minimal() +
labs(
x = NULL,
y = "Concordance (Test)",
fill = NULL
+
) theme(
legend.position = "top",
# axis.text.x = element_text(size = 11),
panel.grid.minor.x = element_blank(),
# panel.grid.minor.y = element_blank()
)
# Save the plot
ggsave("figs/fig_cind.png", width = 7.5, height = 9.2)
ggsave("figs/fig_cind.eps", width = 7.5, height = 9.2)
Reproducible code example
Minimal reproducible example
library(tidyverse) # For data wrangling and visualization
library(WR) # To get the gbc data for illustration
source("R code/basic_functions.R")
# Load data
data("gbc")
<- gbc
df
df# Data partitioning
set.seed(123)
<- df |> wr_split()
obj_split # Take training and test set
<- obj_split$df_train
df_train <- obj_split$df_test
df_test
# 10-fold CV
set.seed(1234)
<- cv_wrnet(df_train$id, df_train$time, df_train$status, df_train |> select(-c(id, time, status)))
obj_cv # Plot CV results (C-index vs log-lambda)
|>
obj_cv ggplot(
aes(x = log(lambda), y = concordance)
+
) geom_point() +
geom_line()
# Optimal lambda
<- obj_cv$lambda[which.max(obj_cv$concordance)]
lambda_opt # Final model
<- wrnet(df_train$id, df_train$time, df_train$status, df_train |> select(-c(id, time, status)), lambda = lambda_opt)
final_fit # Estimated coefficients
$beta
final_fit#> 8 x 1 sparse Matrix of class "dgCMatrix"
#> s0
#> hormone 0.306026364
#> age 0.003111462
#> menopause .
#> size -0.007720497
#> grade -0.285511701
#> nodes -0.082227827
#> prog_recp 0.001861367
#> estrg_recp .
# Variable importance plot
|>
final_fit vi_wrnet() |>
vip()
# Test the final model on test set ----------------------------
<- final_fit |> test_wrnet(df_test)
test_result # Overall and event-specific C-indices
$concordance
test_result#> # A tibble: 3 × 2
#> component concordance
#> <chr> <dbl>
#> 1 death 0.724
#> 2 nonfatal 0.607
#> 3 overall 0.664
A Real Example
Data preparation
Load and clean data on entire cohort
# Load the data
# Dataset non-public
# Can file a request to NHLBI BioLINCC: https://biolincc.nhlbi.nih.gov/studies/hf-action/
<- read_csv("HF-ACTION/hf_action_base.csv",
tmp0 na = "NA")
# Clean up data
<- tmp0 |>
df0 mutate(
patid = factor(patid),
across(c(creatnc,totcholc,hema1cc,hemogloc), parse_number),
sex = factor(sex, levels = c(1, 2), labels = c("Male", "Female")),
racec = factor(racec, levels = c(1, 2, 3, 4), labels = c("Non-White", "White", "Non-White", "Non-White")),
trt_ab = factor(trt_ab, levels = c(0, 1), labels = c("UC", "Training")),
nyhacl = factor(nyhacl, levels = c(2, 3, 4), labels = c("II", "III", "IV")),
etiology = factor(etiology, levels = c(1, 2), labels = c("Ischemic", "Non-ischemic")),
cpxdur = pmin(cpxdur, 20),
sixmwlkd = sixmwlkd / 100
|>
)select(!where(is.character))
# Convert variables to numeric
<- df0 |>
df select(- c(dthhosp, cechfcmp, dhfu, cechfcfu, death, deathfu, ceccvcmp, ceccvcfu,
:peakvo12, # post-randomization measurements
cpct3m
raceblack, raceother, nyhacl, egfrcat, bmicat, ecgcond,
cpxdays3, cpxdays3,:GENDER.y)) |>
com_use.xmutate(
female = (sex == "Female"),
nonwhite = (racec == "Non-White"),
trt_ab = (trt_ab == "Training"),
etiology = (etiology == "Ischemic"),
|>
) select(- c(sex, racec)) |>
# impute missing values by median
mutate(across(where(is.numeric), ~ ifelse(is.na(.), median(., na.rm = TRUE), .)))
# This dataset contain all outcomes and covariates on entire cohort
saveRDS(df, "df.rds")
Subset data to high-risk cohort
# Read in the full dataset
<- readRDS("df.rds")
df # Load hfaction data from rmt package
data("hfaction")
# Obtain patient IDs
<- hfaction |> group_by(patid) |> slice(1) |> ungroup() |> select(patid)
df_sub # Retrieve data for high-risk cohort
<- df |> inner_join(df_sub, by = "patid")
df1 ## This is the analysis dataset
<- df1 df
Data summary
# Number of unique patients
n_distinct(df$patid)
#> [1] 426
<- ncol(df) - 3
p # Outcome summary
|>
df group_by(patid) |>
summarize( # Subject-level characteristics
death = any(status == 1),
hosp = any(status == 2),
comp = any(status >0),
fu_time = max(time)
|>
) summarize( # Summarize across sample
# N (%) of patients with each outcome
across(c(death, hosp, comp), ~ paste0(sum(.x), " (", round(mean(.x) * 100, 1), "%)")),
# Median follow-up time in years
fu_time = median(fu_time) / 365.25
)
Data partitioning
Partitioning into training and testing sets
## Partition data into training versus test sets
## stratified by outcome status:
## 1. Death + hosp; 2. Death only; 3. Hosp only; 4. None
# df <- df1
# Compute outcome status
<- df |>
out_id group_by(patid) |>
summarize(
out = case_when(
any(status == 1) & any(status == 2) ~ 1,
any(status == 1) ~ 2,
any(status == 2) ~ 3,
TRUE ~ 4
)
)# Sample 80% of the data for training by each outcome status
set.seed(123)
<- out_id |>
out_id_train group_by(out) |>
sample_frac(0.8)
# Summarize outcome status in training data
|> count(out) |>
out_id_train group_by() |>
mutate(prop = n / sum(n))
# Partition data
<- df |>
df_train inner_join(out_id_train, by = "patid") |> select(-out)
<- df |>
df_test anti_join(df_train, by = "patid") |> rename(id = patid)
# Summarize outcome status in test data
|>
out_id anti_join(df_train, by = "patid") |>
count(out) |>
group_by() |>
mutate(prop = n / sum(n))
Model training, cross validation, and testing
Train and test model
source("R code/basic_functions.R")
# 10-fold CV
set.seed(1234)
# Range of penalty parameter
<- exp(seq(log(0.1), log(0.0002), length.out = 100))
lambda_range # Run cross validation
<- cv_wrnet(df_train$patid, df_train$time, df_train$status, df_train |> select(-c(patid, time, status)), lambda = lambda_range)
obj_cv
# Plot CV results (C-index vs log-lambda)
|>
obj_cv ggplot(
aes(x = log(lambda), y = concordance)
+
) geom_point() +
geom_line()
# Optimal lambda
<- obj_cv$lambda[which.max(obj_cv$concordance)]
lambda_opt
lambda_opt#> [1] 0.04152671
# Fit final model with optimal lambda
<- wrnet(df_train$patid, df_train$time, df_train$status, df_train |> select(-c(patid, time, status)), lambda = lambda_opt)
final_fit
# Test the final model on test set
<- final_fit |> test_wrnet(df_test)
test_result # Test C-index
$concordance
test_result
# Variable importance
|>
final_fit vi_wrnet() |>
vip()
Fig. 2: Proper vs naive CV via pairwise logistic model
Naive CV based on pairwise logistic regression
# Transform training data into pairwise format
<- pair_win_loss_transform(df_train |> rename(id = patid))
pw_win_train
# Improper cv (this will take some time)
set.seed(1234)
<- cv.glmnet(
pw_cv_fit x = pw_win_train |> select(-(1:4)) |> as.matrix(),
y = pw_win_train$win,
family = "binomial",
type.measure = "class",
# contrain intercept = 0
intercept = FALSE,
lambda = lambda_range
)
# saveRDS(pw_cv_fit, "naive_cv.rds")
# plot(pw_cv_fit)
# Collect CV data from naive and proper methods for comparison
# Naive
<- tibble(
cv_errors_naive lambda = pw_cv_fit$lambda,
accuracy = 1 - pw_cv_fit$cvm,
method = "Naive"
)# Proper
<- tibble(
cv_errors_proper lambda = obj_cv$lambda,
accuracy = obj_cv$concordance,
method = "Proper"
)
# Optimalized concordance from proper CV
<- max(obj_cv$concordance)
max_concordance # Transformation function for secon y-axis (for improper CV)
<- function(x, scale){
lin_trans 0.5 + scale * (x - 0.5)
}# Creat plot
|>
cv_errors_proper ggplot(aes(x = log(lambda), y = accuracy, color = method)) +
geom_point() +
geom_line() +
geom_point(data = cv_errors_naive, aes(y = lin_trans(accuracy, 0.4))) +
geom_line(data = cv_errors_naive, aes(y = lin_trans(accuracy, 0.4))) +
geom_segment(x = log(lambda_opt), xend = log(lambda_opt), y = 0, yend = max_concordance, linetype = 2, alpha = 0.2) +
annotate(
"text",
x = log(lambda_opt),
y = 0.5,
label = round(log(lambda_opt), 1),
hjust = 1.2,
vjust = 1,
size = 3
+
) scale_color_brewer(palette = "Set1", breaks = c("Proper", "Naive")) +
scale_y_continuous("Concordance", breaks = c(0.5, 0.55, 0.6),
sec.axis = sec_axis(~lin_trans(., 1/0.4),
name = "Classification accuracy",
breaks = c(0.5, 0.6, 0.7)
+
)) labs(
x = expression(log(lambda)),
y = "Concordance",
color = NULL
+
) coord_cartesian(ylim = c(0.5, 0.61)) +
theme_minimal() +
theme(
legend.position = "top",
axis.title.y.right = element_text(color = "#E41A1C"), # Color for secondary axis title
axis.title.y.left = element_text(color = "#377EB8"), # Color for primary axis title
axis.text.y.right = element_text(color = "#E41A1C"), # Color for secondary axis text
axis.text.y.left = element_text(color = "#377EB8"), # Color for primary axis text
axis.line.y.right = element_line(color = "#E41A1C"), # Color for secondary axis line
axis.line.y.left = element_line(color = "#377EB8") # Color for primary axis line
)
# Save the plot
ggsave("figs/fig_cv_errors.png", width = 7.5, height = 4)
ggsave("figs/fig_cv_errors.eps", device = cairo_ps, width = 7.5, height = 4)
Table 2: Comparison with regularized Cox model
Fit regularized Cox model
# Subset to first events
<- df_train |>
df_train_tfe group_by(patid) |>
arrange(time) |>
slice(1) |>
mutate(
status = status > 0,
time = if_else(time <= 0, 0.0001, time) # fix 0 times
|>
) ungroup()
# Fit regularized cox regression model
# 10-fold CV
set.seed(1234)
<- cv.glmnet(
cox_cvfit x = df_train_tfe |> select(-c(patid, time, status)) |> as.matrix(),
y = df_train_tfe |> select(time, status) |> as.matrix(),
family = "cox",
type.measure = "C" # C-index for CV
)
# saveRDS(cox_cvfit, "cox_cvfit.rds")
<- readRDS("cox_cvfit.rds")
cox_cvfit
# plot(cox_cvfit)
# Trick test_wrnet() to get risk score for Cox model
$beta <- - coef(cox_cvfit, s = "lambda.min")
cox_cvfit# How many covariates are selected
sum(cox_cvfit$beta != 0)
#> 32
# Test the final model on test set
<- cox_cvfit |> test_wrnet(df_test)
cox_test_result # Test C-index
# Combine with wrnet model
<- cox_test_result$concordance |>
tbl_cind mutate(
model = "Cox",
.before = 1
|>
) bind_rows(
$concordance |>
test_resultmutate(
model = "Win ratio",
.before = 1
)|>
) mutate(
component = str_to_title(component),
concordance = round(concordance, 3)
|>
) pivot_wider(
names_from = model,
values_from = concordance
)
# Output to latex
|>
tbl_cind kable(format = "latex", booktabs = TRUE, escape = FALSE)
Fig. 4: Variable importance comparison
Variable importance comparison
# Trick vi_wrnet() to get variable importance for Cox model
$Zn <- final_fit$Zn
cox_cvfit# plot vip for Cox
|>
cox_cvfit vi_wrnet() |>
vip(k = 20)
# Collect vi data from Cox and Win ratio models
<- cox_cvfit |>
vi_cox vi_wrnet()
# Combine with wrnet model
<- tibble(
vis vars = vi_cox$vars[1:20],
importance = vi_cox$importance[1:20],
model = "Cox"
|>
) bind_rows(
tibble(
vars = vi_wr$vars[1:20],
importance = vi_wr$importance[1:20],
model = "Win ratio"
)
)
# Intersection of top 20 variables
<- vis |>
vi_both count(vars) |>
filter(
== 2
n |> select(-n)
)
# Order variables by importance in win ratio model
|>
vis filter(model == "Win ratio") |>
arrange(desc(importance)) |>
right_join(vi_both, by = "vars") |>
pull(vars)
# Format variable names
<- vis |>
vis1 mutate(
vars = factor(vars, levels = uvars$vars,
labels =
c("KCCQ sympt freq",
"Valve surgery",
"Female vs male",
"Education level",
"KCCQ sympt stability < 50",
"KCCQ social limitation",
"MV peak E/A velocity",
"VT percent max VO2",
"Biplane LVEF",
"Loop diuretic dose",
"Left atrial dimension",
"Both diuretics",
"Work in 6MW",
"Height",
"IVCD ventri conduction",
"Mitral regurgit grade 3+",
"Systolic blood pressure",
"Weber class",
"MV peak E velocity",
"Mitral regurgit grade 6",
"Best LVEF",
"NYHA class",
"CPX heart rate",
"Mitral regurgit pct",
"On loop diuretic",
"Mitral regurgit grade \u2264 2",
"Peak VO2"
)
)
)
# Cox plot
<- vis1 |>
vip_cox filter(model == "Cox") |>
ggplot(
aes(y = reorder(vars, importance), x = importance)
+
) geom_col(width = 0.8) +
labs(
y = NULL,
x = "Importance",
title = "Cox"
+
) scale_x_continuous(expand = expansion(c(0, 0.05))) +
theme_minimal() +
theme(
#center title
plot.title = element_text(hjust = 0.5, size = 11),
panel.grid.major.y = element_blank()
)
# WR plot
<- vis1 |>
vip_wr filter(model == "Win ratio") |>
ggplot(
aes(y = reorder(vars, importance), x = importance)
+
) geom_col(width = 0.8) +
labs(
y = NULL,
x = "Importance",
title = "Win ratio"
+
) scale_x_continuous(expand = expansion(c(0, 0.05)),
breaks = c(0, 0.05, 0.1, 0.15)) +
theme_minimal() +
theme(
#center title
plot.title = element_text(hjust = 0.5, size = 11),
panel.grid.major.y = element_blank()
)
# Combine plots
+ vip_wr
vip_cox # Save figure
ggsave("figs/fig_vip.png", width = 8, height = 6)
ggsave("figs/fig_vip.eps", device = cairo_ps, width = 8, height = 6)
Fig. 3: Risk prediction assessment
Risk prediction assessment
# Group outcomes on test set
<- df_test |>
df_test_risk_score group_by(
id|>
) summarize(
out = case_when(
any(status == 1) ~ "Death",
any(status == 2) ~ "Hosp alone",
TRUE ~ "Event-free"
),out = factor(out, levels = c("Event-free", "Hosp alone", "Death"))
|>
) left_join(
$score_test |> select(id, risk_score)
test_result|>
) left_join(
$score_test |> select(id, risk_score) |> rename(risk_score_cox = risk_score)
cox_test_result|>
) # compute the percentiles of risk_score
mutate(
risk_score_pct = rank(risk_score) / n(),
risk_score_cox_pct = rank(risk_score_cox) / n()
)
# Compute R-squared between the two risk scores
<- cor(df_test_risk_score$risk_score, df_test_risk_score$risk_score_cox)^2
R2
# Left panel: correlation of risk scores
<- df_test_risk_score |>
fig_rs_corr ggplot(
aes(x = risk_score_pct, y = risk_score_cox_pct, color = out)
+
) geom_point(size = 4) +
# Annotate R-squared
annotate(
"text",
x = 0.15,
y = 0.95,
label = bquote(R^2 == .(round(R2, 2))),
size = 4
+
) geom_abline(linetype = 2) +
labs(
x = "Win-ratio risk score percentile",
y = "Cox risk score percentile",
color = NULL
+
) # use a gradient color scale for fill
scale_color_brewer(palette = "Reds") +
scale_x_continuous(labels = scales::percent) +
scale_y_continuous(labels = scales::percent) +
coord_fixed() +
theme_minimal() +
theme(
legend.position = "top"
)
# Right panel: boxplot of risk scores
<- df_test_risk_score |>
fig_rs_box select(-c(risk_score, risk_score_cox)) |>
pivot_longer(
cols = c(risk_score_pct, risk_score_cox_pct),
names_to = "model",
values_to = "score"
|>
) mutate(
model = factor(model, levels = c("risk_score_cox_pct", "risk_score_pct"),
labels = c("Cox", "Win ratio"))
|>
) ggplot(
aes(x = model, y = score, fill = out)
+
) geom_boxplot() +
# use a gradient color scale for fill
scale_fill_brewer(palette = "Reds") +
scale_y_continuous(labels = scales::percent) +
labs(
x = NULL,
y = "Predicted risk score percentile",
fill = NULL
+
) theme_minimal() +
theme(
legend.position = "top",
axis.text.x = element_text(size = 11)
)
# Combine two panels
<- fig_rs_corr + fig_rs_box
fig_preds # Print figure
fig_preds# Save figure
ggsave("figs/fig_preds.png", fig_preds, width = 7.5, height = 4)
ggsave("figs/fig_preds.eps", fig_preds, width = 7.5, height = 4)
Table 3: Inference of win ratio model on entire dataset
Inference of win ratio model on entire dataset
# Selected covariates
# List of variable names
<- df_sub |> left_join(df0, by = "patid")
df <- df |>
df_Z select(any_of(vi_wr$vars[1:20]))
# Fit the model
<- pwreg1(df$patid, df$time, df$status, df_Z)
pw_obj
<- pw_obj$beta
beta <- sqrt(diag(pw_obj$Var))
se
# Rescale loopdose & walkwork by 100
names(beta) %in% c("loopdose", "walkwork")] <- beta[names(beta) %in% c("loopdose", "walkwork")] * 100
beta[names(se) %in% c("loopdose", "walkwork")] <- se[names(se) %in% c("loopdose", "walkwork")] * 100
se[# Construct win ratio, 95% CI, and p-value
<- exp(beta)
wr <- exp(beta + 1.96 * se)
wr_up <- exp(beta - 1.96 * se)
wr_low <- 2 * pnorm(-abs(beta / se))
pval # Tabulate
<- tibble(
tbl_inf vars = vi_wr$vars[1:20],
beta = beta,
se = se,
wr = round(wr, 2),
wr_low = round(wr_low, 2),
wr_up = round(wr_up, 2),
pval = round(pval, 3)
|>
) # format
mutate(
`Win ratio` = wr,
`95% CI` = str_c("[", wr_low, ", ", wr_up, "]")
|>
) select(
`Win ratio`, `95% CI`, pval
vars,
)# Format variable names
<- tbl_inf |>
tbl_inf1 mutate(
vars = factor(vars,
levels = c("female",
"EDUCAT",
"valvsurg",
"loopdiur",
"loopdose",
"bothdiur",
"KCCQSFB",
"KCCQSLSB",
"kccqssb1",
"walkwork",
"nyhaclbn",
"sbp",
"vtpctvo2",
"cpxhrr",
"peakvo2",
"eavelrat",
"bestlvef",
"lftatrdm",
"mitregrg",
"mrgra2"),
labels =
c("Female vs male",
"Education level (1-6)",
"Valve surgery (y vs n)",
"On loop diuretic",
"Loop diuretic dose (mg)",
"On both diuretics (y vs n)",
"KCCQ symptoms freq",
"KCCQ social limitation score",
"KCCQ symptom stability < 50 (y vs n)",
"Work in 6MW (kJ)",
"NYHA class (III/IV vs II)",
"Systolic blood pressure (mmHg)",
"VT percent max VO2 (%)",
"CPX heart rate (bpm)",
"Peak VO2 (mL/kg/min)",
"MV peak E/A velocity ",
"Best LVEF (%)",
"Left atrial dimension (mm)",
"Mitral regurgit percent (%)",
"Mitral regurgit grade 2 (y vs n)")
)|>
) arrange(vars)
# Print in latex
|>
tbl_inf1 kable(format = "latex", booktabs = TRUE, escape = FALSE)
# Print table
|> kable() tbl_inf1