library(openxlsx)
## Warning: package 'openxlsx' was built under R version 4.2.3
library(survival)
## Warning: package 'survival' was built under R version 4.2.3
library(stats)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
set.seed(12345)
final_sav <- data.frame(
p = numeric(0),
p1 = numeric(0),
p2 = numeric(0),
cp = numeric(0),
p_prom = numeric(0),
N_ratio = numeric(0),
sig = numeric(0),
nosig = numeric(0),
n_simu = numeric(0)
)
simu <- function(type, alpha, beta, rate1, rate2, t1, real_rate1, real_rate2, rec, CL, CU, num_simulations){
z_alpha <- qnorm(1 - alpha/2)
z_beta <- qnorm(1 - beta)
# Rate is the probability to develop disease event in 5 units time (for example)
rate <- (rate1+rate2)/2
diff <- rate1-rate2
hazard_ratio <- log(1-rate1)/log(1-rate2)
# Follow-up time is 5 units time
follow_up <- 5
z_total <- z_alpha + z_beta
# Assume at the initiation, only 65% power is reached with initial n, leaves room for sample size re-estimation to reach >=80% power at the final
z_beta_ini <- qnorm(1 - 0.35)
z_total_ini <- z_alpha + z_beta_ini
# MATH formula: Calculate initial sample size
n <- ceiling((z_total_ini^2 * (rate * (1 - rate) * 2)) / diff^2)*2
n1 <- n/2
n2 <- n/2
final_results <- data.frame()
scen_results <- data.frame()
count_simu <- 0
n_prom <- 0
for (i in 1:num_simulations) {
# Calculate hazard rate from event rate
hr1 <- -log(1-real_rate1) / follow_up
hr2 <- -log(1-real_rate2) / follow_up
# Generate survival times
event_time1 <- rexp(n1, rate = hr1)
event_time2 <- rexp(n2, rate = hr2)
event_time <- c(event_time1, event_time2)
# Generate treatment groups
group1 <- rep(1, n1)
group2 <- rep(2, n2)
group <- c(group1, group2)
# Generate subject ID
subj <-1:n
# Assume recruitment time is "rec" times of total follow up time
rec_length <- follow_up*rec
# Assume uniform distribution for recruitment, and scramble the order for even among treatment groups
rec_time <- sample(runif(n, 0, rec_length))
# Study time for each subject at event occurrence is recruitment time plus event time
study_time <- rec_time + event_time
# Final time for each subject (if no event occrurred) is recruitment time plus follow-up time
final_time <- rec_time + follow_up
overall_time_interim <- ifelse(study_time<=final_time, study_time, final_time)
# Time to interim analysis with t1 as information fraction
sorted_interim <- sort(overall_time_interim)
interim_time <- sorted_interim[n*t1]
all_data <- data.frame(study_time = study_time, event_time = event_time, rec_time = rec_time, group = group, interim_time = interim_time, final_time = final_time, subj=subj, overall_time_interim=overall_time_interim)
# surv_data is subjects who finishes total follow-up time at interim cut, which includes subjects with and without events, to avoid bias
surv_data <- all_data %>% filter(final_time <= interim_time)
n_int <- nrow(surv_data)
# Compare with interim time and total follow-up time, for event indicator: 1 = event, 0 = censored
surv_data$cens <- ifelse(surv_data$study_time <= surv_data$interim_time & surv_data$study_time <= surv_data$final_time, 1, 0)
# old way of calculating pooled p only among subjects who finished whole follow-up duration at interim cut. Replaced by KM calculation below.
#p <- sum(surv_data$cens)/n_int
# i_data are subjects who have already enrolled in the study at the interim cut
i_data <- all_data %>% filter(rec_time < interim_time)
i_data$cens <- ifelse(i_data$study_time <= i_data$interim_time & i_data$study_time <= i_data$final_time, 1, 0)
i_data$time <- min(i_data$study_time, i_data$final_time, i_data$interim_time)-i_data$rec_time
a.pooled=survfit(Surv(i_data$time, i_data$cens)~1)
est.p=min(a.pooled$surv)
p <- 1-est.p
# Define the function to solve blinded interim p1 and p2
equation <- function(x, a, b) {
return(1 - 2*a + x - (1 - x)^b)
}
# Given constants a and b
a <- p
b <- hazard_ratio
f_lower <- equation(0, a, b)
f_upper <- equation(1, a, b)
if (!is.na(f_lower) && !is.na(f_upper) && f_lower * f_upper < 0) {
solution <- uniroot(equation, c(0, 1), a = a, b = b)
p2_b <- solution$root
p1_b <- 2*a - p2_b
} else {
p1_b <- p
p2_b <- p
}
# Un-blinded interim, old way of calculating p1 and p2 only among subjects who finished whole follow-up duration at interim cut. Replaced by KM calculation below.
#surv_data_1 <- surv_data %>% filter(group == 1)
#surv_data_2 <- surv_data %>% filter(group == 2)
#p1_u <- sum(surv_data_1$cens)/(n_int/2)
#p2_u <- sum(surv_data_2$cens)/(n_int/2)
i_data_1 <- i_data[i_data$group == 1,]
i_data_2 <- i_data[i_data$group == 2,]
a1 <- survfit(Surv(i_data_1$time, i_data_1$cens)~1)
a2 <- survfit(Surv(i_data_2$time, i_data_2$cens)~1)
p1_u <- 1 - min(a1$surv)
p2_u <- 1 - min(a2$surv)
# Blinded interim
if (type=="Blinded") {
p1 <-p1_b
p2 <-p2_b
}
# Un-blinded interim
if (type=="Unblinded") {
p1 <-p1_u
p2 <-p2_u
}
p1 <- max(p1, 0.001)
p1 <- min(p1, 0.999)
p2 <- max(p2, 0.001)
p2 <- min(p2, 0.999)
#if (p1 <= 0 | p2 <= 0 | p1 >= 1 | p2 >= 1) {next}
delta <- ifelse((p1 - p2!=0), p1-p2, 0.001)
# MATH formula: calculate conditional power (Lachin 2005)
SE <- sqrt(p1*(1-p1)/(n_int/2) + p2*(1-p2)/(n_int/2))
Z_t <-(p1-p2)/SE
B_t <- Z_t * sqrt(t1)
delta_hat_t <- B_t / t1
Z_CP_t <- (delta_hat_t - z_alpha) / sqrt(1 - t1)
cp <- pnorm(Z_CP_t)
# MATH formula: calculate new sample size needed to reach 80% power, with p from interim
n_star <- ceiling((z_total^2 * (p * (1 - p) * 2)) / (abs(delta))^2)*2
# Apply max N
n_new <- min(n_star, 2*n)
n_new <- max(n_new, n)
# Decision rules
if (cp > CL & cp < CU & p1 > p2 & n_new > n) {
cat="Promising"
n_prom <- n_prom + 1
# add n
n_add <- ceiling((n_new-n)/2)*2
add_event_time1 <- rexp(n_add/2, rate = - log(1-real_rate1) / follow_up)
add_event_time2 <- rexp(n_add/2, rate = - log(1-real_rate2) / follow_up)
add_event_time <- c(add_event_time1, add_event_time2)
add_group1 <- rep(1, n_add/2)
add_group2 <- rep(2, n_add/2)
add_group <- c(add_group1, add_group2)
add_rec_length <- rec_length*n_add/n
add_rec_time <- sample(runif(n_add, 0, add_rec_length)) + interim_time
add_study_time <- add_rec_time + add_event_time
add_final_time <- add_rec_time + follow_up
add_overall_time_interim <- ifelse(add_study_time<=add_final_time, add_study_time, add_final_time)
add_subj <- (n+1):n_new
add_data <- data.frame(study_time = add_study_time, event_time = add_event_time, rec_time = add_rec_time, group = add_group, interim_time=interim_time, final_time=add_final_time, subj=add_subj, overall_time_interim=add_overall_time_interim)
comb_data <- rbind(all_data, add_data)
final_data <- comb_data %>% subset(select=c(study_time, rec_time, group, interim_time, final_time, subj))
# event time compare with final time to get indicator: 1 = event, 0 = censored
final_data$cens <- ifelse(final_data$study_time <= final_data$final_time, 1, 0)
}
else {
n_new=n
final_data <- all_data %>% subset(select=c(study_time, rec_time, group, interim_time, final_time, subj))
# event time compare with final time: 1 = event, 0 = censored
final_data$cens <- ifelse(final_data$study_time <= final_data$final_time, 1, 0)
if (cp <= CL | p1 <= p2) {cat="Unfavorable"}
else {cat="Favorable"}
}
count_simu <- count_simu + 1
final_table_ini <- table(final_data$group, final_data$cens)
# Convert the table to a data frame
df_final_table <- as.data.frame(final_table_ini)
# Get the unique groups
unique_groups <- unique(final_data$group)
# Check and add missing levels
if (!"0" %in% df_final_table$Var2) {
missing_level_0 <- data.frame(Var1 = unique_groups, Var2 = "0", Freq = 0)
df_final_table <- rbind(df_final_table, missing_level_0)
}
if (!"1" %in% df_final_table$Var2) {
missing_level_1 <- data.frame(Var1 = unique_groups, Var2 = "1", Freq = 0)
df_final_table <- rbind(df_final_table, missing_level_1)
}
# Convert back to a table
final_table <- xtabs(Freq ~ Var1 + Var2, data = df_final_table)
count_group1 <- final_table["1", "1"]
count_group2 <- final_table["2", "1"]
final_p1 <- count_group1/(n_new/2)
final_p2 <- count_group2/(n_new/2)
final_p1 <- max(final_p1, 0.001)
final_p1 <- min(final_p1, 0.999)
final_p2 <- max(final_p2, 0.001)
final_p2 <- min(final_p2, 0.999)
#if (final_p1 <= 0 | final_p2 <= 0 | final_p1 >= 1 | final_p2 >= 1) {next}
final_p <- (count_group1 + count_group2) / n_new
# MATH formula: calculate p value and statistical significance at final analysis
SE_final <- sqrt(final_p1*(1-final_p1)/(n_new/2) + final_p2*(1-final_p2)/(n_new/2))
z_final <- (final_p1 - final_p2) / SE_final
p_value <- 2 * (1 - pnorm(z_final))
hypo <- ifelse(p_value < alpha, 1, 0)
final_results <- rbind(final_results, data.frame(p = p, p1 = p1, p2 = p2, cp = cp, cat = cat, n_new = n_new, final_p1=final_p1, final_p2=final_p2, SE_final=SE_final, z_final=z_final, p_value = p_value, hypo = hypo, count_simu = count_simu, n_prom = n_prom, n_int = nrow(surv_data)))
}
cat_count <- table(final_results$cat)
cat_per <- round(prop.table(cat_count),digits=2)
df <- as.data.frame(cat_per)
rownames(df) <- df$Var1
df$Var1 <- NULL
df_t <- as.data.frame(t(df))
row.names(df_t) <- NULL
if (any(!is.na(final_results$count_simu))){
n_simu <- max(final_results$count_simu)
p_prom <- max(final_results$n_prom)/n_simu
sig <- sum(final_results$hypo)/n_simu
nosig <- 1 - sig
report1 <- data.frame(
cp = round(mean(final_results$cp), digits = 4),
p_prom = round(p_prom, digits = 4),
N_ratio = round(mean(final_results$n_new) / n, digits = 4),
sig = round(sig, digits = 4),
nosig = round(nosig, digits = 4),
n_simu = n_simu
)
# Set row names to NULL
row.names(report1) <- NULL
report <- cbind(data.frame(type=type, rate1=rate1, rate2=rate2, real_rate1=real_rate1, real_rate2=real_rate2), report1)
return(report)
}
}
for (r in seq(0.1, 0.9, by = 0.1)) {
sav1 <- simu(type = "Blinded", alpha=0.05, beta=0.2, rate1=0.7, rate2=0.3, t1=0.5, real_rate1=r, real_rate2=r, rec=2, CL=0.2, CU=0.8, num_simulations=1000)
final_sav <- rbind(final_sav, sav1)
sav2 <- simu(type = "Blinded", alpha=0.05, beta=0.2, rate1=0.7, rate2=0.5, t1=0.5, real_rate1=r, real_rate2=r, rec=2, CL=0.2, CU=0.8, num_simulations=1000)
final_sav <- rbind(final_sav, sav2)
sav3 <- simu(type = "Blinded", alpha=0.05, beta=0.2, rate1=0.7, rate2=0.2, t1=0.5, real_rate1=r, real_rate2=r, rec=2, CL=0.2, CU=0.8, num_simulations=1000)
final_sav <- rbind(final_sav, sav3)
sav4 <- simu(type = "Blinded", alpha=0.05, beta=0.2, rate1=0.7, rate2=0.4, t1=0.5, real_rate1=r, real_rate2=r, rec=2, CL=0.2, CU=0.8, num_simulations=1000)
final_sav <- rbind(final_sav, sav4)
sav5 <- simu(type = "Blinded", alpha=0.05, beta=0.2, rate1=0.7, rate2=0.6, t1=0.5, real_rate1=r, real_rate2=r, rec=2, CL=0.2, CU=0.8, num_simulations=1000)
final_sav <- rbind(final_sav, sav5)
sav6 <- simu(type = "Unblinded", alpha=0.05, beta=0.2, rate1=0.7, rate2=0.3, t1=0.5, real_rate1=r, real_rate2=r, rec=2, CL=0.2, CU=0.8, num_simulations=1000)
final_sav <- rbind(final_sav, sav6)
sav7 <- simu(type = "Unblinded", alpha=0.05, beta=0.2, rate1=0.7, rate2=0.5, t1=0.5, real_rate1=r, real_rate2=r, rec=2, CL=0.2, CU=0.8, num_simulations=1000)
final_sav <- rbind(final_sav, sav7)
sav8 <- simu(type = "Unblinded", alpha=0.05, beta=0.2, rate1=0.7, rate2=0.2, t1=0.5, real_rate1=r, real_rate2=r, rec=2, CL=0.2, CU=0.8, num_simulations=1000)
final_sav <- rbind(final_sav, sav8)
sav9 <- simu(type = "Unblinded", alpha=0.05, beta=0.2, rate1=0.7, rate2=0.4, t1=0.5, real_rate1=r, real_rate2=r, rec=2, CL=0.2, CU=0.8, num_simulations=1000)
final_sav <- rbind(final_sav, sav9)
sav10 <- simu(type = "Unblinded", alpha=0.05, beta=0.2, rate1=0.7, rate2=0.6, t1=0.5, real_rate1=r, real_rate2=r, rec=2, CL=0.2, CU=0.8, num_simulations=1000)
final_sav <- rbind(final_sav, sav10)
}
final_sav <- final_sav[order(final_sav$type, final_sav$rate1, final_sav$rate2, final_sav$real_rate1, final_sav$real_rate2),]
colnames(final_sav) <- c("Type", "Control Rate Assumption", "Treatment Rate Assumption", "Actual Control Rate", "Actual Treatment Rate", "Conditional Power", "Probability of Increasing Sample Size", "New N vs Old N Ratio", "Significant at Final", "Non-significant at Final", "Number of Valid Simulations")
directory_path <- "C:/Users/szhuo/Box/!Personal Workspace - Shuqiong Zhuo/PhD/final_CP/final_data"
file_path <- file.path(directory_path, "HR_CP_b_u_t50_type1_error_10000.xlsx")
if (file.exists(file_path)) {
wb <- loadWorkbook(file_path)
} else {
wb <- createWorkbook()
}
writeData(wb, sheet = "Overall", final_sav)
saveWorkbook(wb, file = file_path, overwrite = TRUE)
cat("Excel file saved to:", file_path)
## Excel file saved to: C:/Users/szhuo/Box/!Personal Workspace - Shuqiong Zhuo/PhD/final_CP/final_data/HR_CP_b_u_t50_type1_error_10000.xlsx