library(openxlsx)
## Warning: package 'openxlsx' was built under R version 4.2.3
library(survival)
## Warning: package 'survival' was built under R version 4.2.3
library(stats)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
set.seed(12345)

final_sav <- data.frame(
  p = numeric(0),
  p1 = numeric(0),
  p2 = numeric(0),
  cp = numeric(0),
  p_prom = numeric(0),
  N_ratio = numeric(0),
  sig = numeric(0),
  nosig = numeric(0),
  n_simu = numeric(0)
)

simu <- function(type, alpha, beta, rate1, rate2, t1, real_rate1, real_rate2, rec, CL, CU, num_simulations){
z_alpha <- qnorm(1 - alpha/2)
z_beta <- qnorm(1 - beta)
# Rate is the probability to develop disease event in 5 units time (for example)
rate <- (rate1+rate2)/2
diff <- rate1-rate2
hazard_ratio <- log(1-rate1)/log(1-rate2)
# Follow-up time is 5 units time
follow_up  <- 5
z_total <- z_alpha + z_beta

# Assume at the initiation, only 65% power is reached with initial n, leaves room for sample size re-estimation to reach >=80% power at the final
z_beta_ini <- qnorm(1 - 0.35)
z_total_ini <- z_alpha + z_beta_ini

# MATH formula: Calculate initial sample size 
n <- ceiling((z_total_ini^2 * (rate * (1 - rate) * 2)) / diff^2)*2

n1 <- n/2
n2 <- n/2
final_results <- data.frame()
scen_results <- data.frame()
count_simu <- 0
n_prom <- 0

for (i in 1:num_simulations) {
# Calculate hazard rate from event rate
hr1 <- -log(1-real_rate1) / follow_up
hr2 <- -log(1-real_rate2) / follow_up
# Generate survival times
event_time1 <- rexp(n1, rate = hr1)
event_time2 <- rexp(n2, rate = hr2)
event_time <- c(event_time1, event_time2)
# Generate treatment groups
group1 <- rep(1, n1)
group2 <- rep(2, n2)
group <- c(group1, group2)
# Generate subject ID
subj <-1:n

# Assume recruitment time is "rec" times of total follow up time 
rec_length <- follow_up*rec
# Assume uniform distribution for recruitment, and scramble the order for even among treatment groups
rec_time <- sample(runif(n, 0, rec_length))
# Study time for each subject at event occurrence is recruitment time plus event time
study_time <- rec_time + event_time
# Final time for each subject (if no event occrurred) is recruitment time plus follow-up time
final_time <- rec_time + follow_up   
overall_time_interim <- ifelse(study_time<=final_time, study_time, final_time)

# Time to interim analysis with t1 as information fraction
sorted_interim <- sort(overall_time_interim)
interim_time <- sorted_interim[n*t1]

all_data <- data.frame(study_time = study_time, event_time = event_time, rec_time = rec_time, group = group, interim_time = interim_time, final_time = final_time, subj=subj, overall_time_interim=overall_time_interim)

# surv_data is subjects who finishes total follow-up time at interim cut, which includes subjects with and without events, to avoid bias
surv_data <- all_data %>% filter(final_time <= interim_time)
n_int <- nrow(surv_data)

# Compare with interim time and total follow-up time, for event indicator: 1 = event, 0 = censored
surv_data$cens <- ifelse(surv_data$study_time <= surv_data$interim_time & surv_data$study_time <= surv_data$final_time, 1, 0)

# old way of calculating pooled p only among subjects who finished whole follow-up duration at interim cut. Replaced by KM calculation below.
#p <- sum(surv_data$cens)/n_int

# i_data are subjects who have already enrolled in the study at the interim cut
i_data <- all_data %>% filter(rec_time < interim_time)

i_data$cens <- ifelse(i_data$study_time <= i_data$interim_time & i_data$study_time <= i_data$final_time, 1, 0)
i_data$time <- min(i_data$study_time, i_data$final_time, i_data$interim_time)-i_data$rec_time

a.pooled=survfit(Surv(i_data$time, i_data$cens)~1) 
est.p=min(a.pooled$surv)
p <- 1-est.p


# Define the function to solve blinded interim p1 and p2
equation <- function(x, a, b) {
  return(1 - 2*a + x - (1 - x)^b)
}

# Given constants a and b
a <- p  
b <- hazard_ratio   

f_lower <- equation(0, a, b)
f_upper <- equation(1, a, b)

if (!is.na(f_lower) && !is.na(f_upper) && f_lower * f_upper < 0) {
  solution <- uniroot(equation, c(0, 1), a = a, b = b)
  p2_b <- solution$root
  p1_b <- 2*a - p2_b
} else {
  p1_b <- p
  p2_b <- p
}

# Un-blinded interim, old way of calculating p1 and p2 only among subjects who finished whole follow-up duration at interim cut. Replaced by KM calculation below.
#surv_data_1 <- surv_data %>% filter(group == 1)
#surv_data_2 <- surv_data %>% filter(group == 2)
#p1_u <- sum(surv_data_1$cens)/(n_int/2)
#p2_u <- sum(surv_data_2$cens)/(n_int/2)


i_data_1 <- i_data[i_data$group == 1,]
i_data_2 <- i_data[i_data$group == 2,]

a1 <- survfit(Surv(i_data_1$time, i_data_1$cens)~1) 
a2 <- survfit(Surv(i_data_2$time, i_data_2$cens)~1) 

p1_u <- 1 - min(a1$surv)
p2_u <- 1 - min(a2$surv)


# Blinded interim
if (type=="Blinded") {
p1 <-p1_b
p2 <-p2_b
}

# Un-blinded interim
if (type=="Unblinded") {
p1 <-p1_u
p2 <-p2_u
}

p1 <- max(p1, 0.001)
p1 <- min(p1, 0.999)
p2 <- max(p2, 0.001)
p2 <- min(p2, 0.999)

#if (p1 <= 0 | p2 <= 0 | p1 >= 1 | p2 >= 1) {next}
delta <- ifelse((p1 - p2!=0), p1-p2, 0.001)

# MATH formula: calculate conditional power (Lachin 2005)
SE <- sqrt(p1*(1-p1)/(n_int/2) + p2*(1-p2)/(n_int/2))
Z_t <-(p1-p2)/SE
B_t <- Z_t * sqrt(t1)
delta_hat_t <- B_t / t1
Z_CP_t <- (delta_hat_t - z_alpha) / sqrt(1 - t1)
cp <- pnorm(Z_CP_t)

# MATH formula: calculate new sample size needed to reach 80% power, with p from interim
n_star <- ceiling((z_total^2 * (p * (1 - p) * 2)) / (abs(delta))^2)*2

# Apply max N
n_new <- min(n_star, 2*n)
n_new <- max(n_new, n)

# Decision rules
if (cp > CL & cp < CU & p1 > p2 & n_new > n) {
cat="Promising"
n_prom <- n_prom + 1

# add n
n_add <- ceiling((n_new-n)/2)*2

add_event_time1 <- rexp(n_add/2, rate = - log(1-real_rate1) / follow_up)
add_event_time2 <- rexp(n_add/2, rate = - log(1-real_rate2) / follow_up)
add_event_time <- c(add_event_time1, add_event_time2)

add_group1 <- rep(1, n_add/2)
add_group2 <- rep(2, n_add/2)
add_group <- c(add_group1, add_group2)

add_rec_length <- rec_length*n_add/n
add_rec_time <- sample(runif(n_add, 0, add_rec_length)) + interim_time
add_study_time <- add_rec_time + add_event_time
add_final_time <- add_rec_time +  follow_up
add_overall_time_interim <- ifelse(add_study_time<=add_final_time, add_study_time, add_final_time)

add_subj <- (n+1):n_new

add_data <- data.frame(study_time = add_study_time, event_time = add_event_time, rec_time = add_rec_time, group = add_group, interim_time=interim_time, final_time=add_final_time, subj=add_subj, overall_time_interim=add_overall_time_interim)

comb_data <- rbind(all_data, add_data)

final_data <- comb_data %>% subset(select=c(study_time, rec_time, group, interim_time, final_time, subj))

# event time compare with final time to get indicator: 1 = event, 0 = censored
final_data$cens <- ifelse(final_data$study_time <= final_data$final_time, 1, 0)
} 
else {
n_new=n
final_data <- all_data %>% subset(select=c(study_time, rec_time, group, interim_time, final_time, subj))
# event time compare with final time: 1 = event, 0 = censored
final_data$cens <- ifelse(final_data$study_time <= final_data$final_time, 1, 0)

if (cp <= CL | p1 <= p2) {cat="Unfavorable"} 
else {cat="Favorable"}
}

count_simu <- count_simu + 1

final_table_ini <- table(final_data$group, final_data$cens)

# Convert the table to a data frame
df_final_table <- as.data.frame(final_table_ini)

# Get the unique groups
unique_groups <- unique(final_data$group)

# Check and add missing levels
if (!"0" %in% df_final_table$Var2) {
  missing_level_0 <- data.frame(Var1 = unique_groups, Var2 = "0", Freq = 0)
  df_final_table <- rbind(df_final_table, missing_level_0)
}
if (!"1" %in% df_final_table$Var2) {
  missing_level_1 <- data.frame(Var1 = unique_groups, Var2 = "1", Freq = 0)
  df_final_table <- rbind(df_final_table, missing_level_1)
}

# Convert back to a table
final_table <- xtabs(Freq ~ Var1 + Var2, data = df_final_table)

count_group1 <- final_table["1", "1"]
count_group2 <- final_table["2", "1"]
final_p1 <- count_group1/(n_new/2)
final_p2 <- count_group2/(n_new/2)

final_p1 <- max(final_p1, 0.001)
final_p1 <- min(final_p1, 0.999)
final_p2 <- max(final_p2, 0.001)
final_p2 <- min(final_p2, 0.999)

#if (final_p1 <= 0 | final_p2 <= 0 | final_p1 >= 1 | final_p2 >= 1) {next}

final_p <- (count_group1 + count_group2) / n_new

# MATH formula: calculate p value and statistical  significance at final analysis
SE_final <- sqrt(final_p1*(1-final_p1)/(n_new/2) + final_p2*(1-final_p2)/(n_new/2))
z_final <- (final_p1 - final_p2) / SE_final
p_value <- 2 * (1 - pnorm(z_final))
hypo <- ifelse(p_value < alpha, 1, 0)

final_results <- rbind(final_results, data.frame(p = p, p1 = p1, p2 = p2, cp = cp, cat = cat, n_new = n_new, final_p1=final_p1, final_p2=final_p2, SE_final=SE_final, z_final=z_final, p_value = p_value, hypo = hypo, count_simu = count_simu, n_prom = n_prom, n_int = nrow(surv_data)))

}

cat_count <- table(final_results$cat) 
cat_per <- round(prop.table(cat_count),digits=2)
df <- as.data.frame(cat_per)
rownames(df) <- df$Var1
df$Var1 <- NULL
df_t <- as.data.frame(t(df))
row.names(df_t) <- NULL

if (any(!is.na(final_results$count_simu))){
n_simu <- max(final_results$count_simu)
p_prom <- max(final_results$n_prom)/n_simu
sig <- sum(final_results$hypo)/n_simu
nosig <- 1 - sig

  report1 <- data.frame(
    cp = round(mean(final_results$cp), digits = 4),
    p_prom = round(p_prom, digits = 4),
    N_ratio = round(mean(final_results$n_new) / n, digits = 4),
    sig = round(sig, digits = 4),
    nosig = round(nosig, digits = 4),
    n_simu = n_simu
  )
  
  # Set row names to NULL
  row.names(report1) <- NULL
  
  report <- cbind(data.frame(type=type, rate1=rate1, rate2=rate2, real_rate1=real_rate1, real_rate2=real_rate2), report1)
  return(report)
}
}

  for (r in seq(0.1, 0.9, by = 0.1)) {
    sav1 <- simu(type = "Blinded", alpha=0.05, beta=0.2, rate1=0.7, rate2=0.3, t1=0.5, real_rate1=r, real_rate2=r, rec=2, CL=0.2, CU=0.8, num_simulations=1000)
    final_sav <- rbind(final_sav, sav1)
    sav2 <- simu(type = "Blinded", alpha=0.05, beta=0.2, rate1=0.7, rate2=0.5, t1=0.5, real_rate1=r, real_rate2=r, rec=2, CL=0.2, CU=0.8, num_simulations=1000)
    final_sav <- rbind(final_sav, sav2)
    sav3 <- simu(type = "Blinded", alpha=0.05, beta=0.2, rate1=0.7, rate2=0.2, t1=0.5, real_rate1=r, real_rate2=r, rec=2, CL=0.2, CU=0.8, num_simulations=1000)
    final_sav <- rbind(final_sav, sav3)
    sav4 <- simu(type = "Blinded", alpha=0.05, beta=0.2, rate1=0.7, rate2=0.4, t1=0.5, real_rate1=r, real_rate2=r, rec=2, CL=0.2, CU=0.8, num_simulations=1000)
    final_sav <- rbind(final_sav, sav4)
    sav5 <- simu(type = "Blinded", alpha=0.05, beta=0.2, rate1=0.7, rate2=0.6, t1=0.5, real_rate1=r, real_rate2=r, rec=2, CL=0.2, CU=0.8, num_simulations=1000)
    final_sav <- rbind(final_sav, sav5)
    
    sav6 <- simu(type = "Unblinded", alpha=0.05, beta=0.2, rate1=0.7, rate2=0.3, t1=0.5, real_rate1=r, real_rate2=r, rec=2, CL=0.2, CU=0.8, num_simulations=1000)
    final_sav <- rbind(final_sav, sav6)
    sav7 <- simu(type = "Unblinded", alpha=0.05, beta=0.2, rate1=0.7, rate2=0.5, t1=0.5, real_rate1=r, real_rate2=r, rec=2, CL=0.2, CU=0.8, num_simulations=1000)
    final_sav <- rbind(final_sav, sav7)
    sav8 <- simu(type = "Unblinded", alpha=0.05, beta=0.2, rate1=0.7, rate2=0.2, t1=0.5, real_rate1=r, real_rate2=r, rec=2, CL=0.2, CU=0.8, num_simulations=1000)
    final_sav <- rbind(final_sav, sav8)
    sav9 <- simu(type = "Unblinded", alpha=0.05, beta=0.2, rate1=0.7, rate2=0.4, t1=0.5, real_rate1=r, real_rate2=r, rec=2, CL=0.2, CU=0.8, num_simulations=1000)
    final_sav <- rbind(final_sav, sav9)
    sav10 <- simu(type = "Unblinded", alpha=0.05, beta=0.2, rate1=0.7, rate2=0.6, t1=0.5, real_rate1=r, real_rate2=r, rec=2, CL=0.2, CU=0.8, num_simulations=1000)
    final_sav <- rbind(final_sav, sav10)
  }


final_sav <- final_sav[order(final_sav$type, final_sav$rate1, final_sav$rate2, final_sav$real_rate1, final_sav$real_rate2),]

colnames(final_sav) <- c("Type", "Control Rate Assumption", "Treatment Rate Assumption", "Actual Control Rate", "Actual Treatment Rate", "Conditional Power", "Probability of Increasing Sample Size", "New N vs Old N Ratio", "Significant at Final", "Non-significant at Final", "Number of Valid Simulations")

directory_path <- "C:/Users/szhuo/Box/!Personal Workspace - Shuqiong Zhuo/PhD/final_CP/final_data"
file_path <- file.path(directory_path, "HR_CP_b_u_t50_type1_error_10000.xlsx")
if (file.exists(file_path)) {
  wb <- loadWorkbook(file_path)
} else {
  wb <- createWorkbook()
}
writeData(wb, sheet = "Overall", final_sav)
saveWorkbook(wb, file = file_path, overwrite = TRUE)
cat("Excel file saved to:", file_path)
## Excel file saved to: C:/Users/szhuo/Box/!Personal Workspace - Shuqiong Zhuo/PhD/final_CP/final_data/HR_CP_b_u_t50_type1_error_10000.xlsx