library(survival)
## Warning: package 'survival' was built under R version 4.2.3
library(stats)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
set.seed(12345)

final_sav <- data.frame(
  p = numeric(0),
  p1 = numeric(0),
  p2 = numeric(0),
  cp = numeric(0),
  p_prom = numeric(0),
  N_ratio = numeric(0),
  sig = numeric(0),
  nosig = numeric(0),
  n_simu = numeric(0)
)

simu <- function(type, alpha, beta, rate1, rate2, t1, real_rate1, real_rate2, rec, CL, CU, num_simulations){
z_alpha <- qnorm(1 - alpha/2)
z_beta <- qnorm(1 - beta)
# Rate is the probability to develop disease event in 76 weeks (for example)
rate <- (rate1+rate2)/2
diff <- rate1-rate2

# Calculate total Z-score required
z_total <- z_alpha + z_beta

# Calculate required sample size 
n <- ceiling((z_total^2 * (rate * (1 - rate) * 2)) / diff^2)*2
n1 <- n/2
n2 <- n/2
final_results <- data.frame()
scen_results <- data.frame()
count_simu <- 0
n_prom <- 0

for (i in 1:num_simulations) {
# Calculate hazard rate from event rate
hr1 <- -log(1-real_rate1) / 76
hr2 <- -log(1-real_rate2) / 76
# Generate survival times
event_time1 <- rexp(n1, rate = hr1)
event_time2 <- rexp(n2, rate = hr2)
event_time <- c(event_time1, event_time2)

group1 <- rep(1, n1)
group2 <- rep(2, n2)
group <- c(group1, group2)

subj <-1:n

# Follow-up time is 76 weeks
follow_up  <- 76
# Assume recruitment time is rec rate of total follow up time (final time)
rec_length <- follow_up*rec
# Assume uniform distribution for recruitment
rec_time <- runif(n, 0, rec_length)
# Study time for each subject at event occurrence is recruitment time plus event time
study_time <- rec_time + event_time
# Final time for each subject (if no event happened) is recruitment time plus follow-up time
final_time <- rec_time + follow_up   

# Time to interim analysis (n*t1 subjects reach 76 weeks or have events)
sorted_interim <- sort(final_time)
interim_time <- sorted_interim[n*t1]

all_data <- data.frame(study_time = study_time, event_time = event_time, rec_time = rec_time, group = group, interim_time = interim_time, final_time = final_time, subj=subj)
surv_data <- all_data %>% filter(final_time <= interim_time)

# Compare with interim time for event indicator: 1 = event, 0 = censored
surv_data$cens <- ifelse(surv_data$study_time <= surv_data$interim_time, 1, 0)

n_int <- nrow(surv_data)
p <- sum(surv_data$cens)/n_int

# Un-blinded interim
surv_data_1 <- surv_data %>% filter(group == 1)
surv_data_2 <- surv_data %>% filter(group == 2)
p1_u <- sum(surv_data_1$cens)/(n_int/2)
p2_u <- sum(surv_data_2$cens)/(n_int/2)

# Blinded interim
# Calculate event rate in each treatment at interim, with control group fixed
p1_b <- rate1
p2_b <- p * 2 - p1_b


# Un-blinded interim
if (type=="Unblinded") {
if (p1_u>=1 | p2_u>=1 | p1_u<=0 | p2_u<=0) {
  next
}
p1 <-p1_u
p2 <-p2_u
}

# Blinded interim
if (type=="Blinded") {
if (p1_b<=0 | p1_b>=1| p2_b<=0 | p2_b>=1) {
  next
}
p1 <-p1_b
p2 <-p2_b
}

delta <- p1 - p2
SE <- sqrt(p1*(1-p1)/(n_int/2) + p2*(1-p2)/(n_int/2))
Z_t <-(p1-p2)/SE
# Calculate B-value
B_t <- Z_t * sqrt(t1)
# Estimate drift parameter
delta_hat_t <- B_t / t1
# Calculate conditional power
Z_CP_t <- (delta_hat_t - z_alpha) / sqrt(1 - t1)
cp <- pnorm(Z_CP_t)

# Decision rules
if (is.na(cp)) {next}
else {
if (cp > CL & cp < CU) {
cat="Promising"
n_prom <- n_prom + 1
# Calculate the required sample size 
n_star <- ceiling((z_total^2 * (p * (1 - p) * 2)) / (abs(delta))^2)*2
# Apply max N
n_new <- ifelse(n_star<=2*n, n_star, 2*n)

# add n
n_add <- ceiling((n_new-n)/2)*2

add_event_time1 <- rexp(n_add/2, rate = - log(1-p1) / 76)
add_event_time2 <- rexp(n_add/2, rate = - log(1-p2) / 76)
add_event_time <- c(add_event_time1, add_event_time2)

add_group1 <- rep(1, n_add/2)
add_group2 <- rep(2, n_add/2)
add_group <- c(add_group1, add_group2)

add_rec_length <- rec_length*n_add/n
add_rec_time <- runif(n_add, 0, add_rec_length) + interim_time
add_study_time <- add_rec_time + add_event_time
add_final_time <- add_rec_time +  follow_up

add_subj <- (n+1):n_new

add_data <- data.frame(study_time = add_study_time, event_time = add_event_time, rec_time = add_rec_time, group = add_group, interim_time=interim_time, final_time=add_final_time, subj=add_subj)

comb_data <- rbind(all_data, add_data)

final_data <- comb_data %>% subset(select=c(study_time, event_time, rec_time, group, interim_time, final_time, subj))

# event time compare with final time to get indicator: 1 = event, 0 = censored
final_data$cens <- ifelse(final_data$study_time <= final_data$final_time, 1, 0)
} 
else {
n_new=n
final_data <- all_data %>% subset(select=c(study_time, event_time, rec_time, group, interim_time, final_time, subj))

# event time compare with final time: 1 = event, 0 = censored
final_data$cens <- ifelse(final_data$study_time <= final_data$final_time, 1, 0)

if (cp <= CL) {cat="Unfavorable"} 
else if (cp >= CU) {cat="Favorable"}
}
  
count_simu <- count_simu + 1

# final and interim analysis results
final_table <- table(final_data$group, final_data$cens)
count_group1 <- final_table["1", "1"]
count_group2 <- final_table["2", "1"]
final_p1 <- count_group1/(n_new/2)
final_p2 <- count_group2/(n_new/2)
final_p <- (count_group1 + count_group2) / n_new
# Standard error
SE_final <- sqrt(final_p1*(1-final_p1)/(n_new/2) + final_p2*(1-final_p2)/(n_new/2))
# Z statistic
z_final <- (final_p1 - final_p2) / SE_final
# P-value
p_value <- 2 * (1 - pnorm(z_final))
hypo <- ifelse(p_value < alpha, 1, 0)

final_results <- rbind(final_results, data.frame(p = p, p1 = p1, p2 = p2, cp = cp, cat = cat, n_new = n_new, p_value = p_value, hypo = hypo, count_simu = count_simu, n_prom = n_prom, n_int = nrow(surv_data)))
}
}

cat_count <- table(final_results$cat) 
cat_per <- round(prop.table(cat_count),digits=2)
df <- as.data.frame(cat_per)
rownames(df) <- df$Var1
df$Var1 <- NULL
# Transpose the data frame
df_t <- as.data.frame(t(df))
row.names(df_t) <- NULL

if (any(!is.na(final_results$count_simu))){
n_simu <- max(final_results$count_simu)
p_prom <- max(final_results$n_prom)/n_simu
sig <- sum(final_results$hypo)/n_simu
nosig <- 1 - sig

  report1 <- data.frame(
    cp = round(mean(final_results$cp), digits = 2),
    p_prom = round(p_prom, digits = 2),
    N_ratio = round(mean(final_results$n_new) / n, digits = 2),
    sig = round(sig, digits = 2),
    nosig = round(nosig, digits = 2),
    n_simu = n_simu
  )
  
  # Set row names to NULL
  row.names(report1) <- NULL
  
  report <- cbind(data.frame(type=type, rate1=rate1, rate2=rate2, real_rate1=real_rate1, real_rate2=real_rate2), report1)
  return(report)
}
}

for (type in c("Unblinded", "Blinded")){
for (r1 in seq(0.1, 0.9, by = 0.1)) {
  for (r2 in seq(0.1, 0.9, by = 0.1)) {
    sav1 <- simu(type = type, alpha=0.05, beta=0.2, rate1=0.7, rate2=0.3, t1=0.5, real_rate1=r1, real_rate2=r2, rec=1, CL=0.2, CU=0.8, num_simulations=100)
    final_sav <- rbind(final_sav, sav1)
    sav2 <- simu(type = type, alpha=0.05, beta=0.2, rate1=0.7, rate2=0.5, t1=0.5, real_rate1=r1, real_rate2=r2, rec=1, CL=0.2, CU=0.8, num_simulations=100)
    final_sav <- rbind(final_sav, sav2)
  }
}
}

final_sav <- final_sav[order(final_sav$type, final_sav$rate1, final_sav$rate2, final_sav$real_rate1, final_sav$real_rate2),]

# Assign column labels
colnames(final_sav) <- c("Type", "Control Rate Assumption", "Treatment Rate Assumption", "Actual Control Rate", "Actual Treatment Rate", "Conditional Power", "Probability of Increasing Sample Size", "New N vs Old N Ratio", "Significant at Final", "Non-significant at Final", "Number of Valid Simulations")

# Specify the path where you want to save the file
file_path <- "C:/Users/szhuo/Downloads/PhD/CP2/CP_simulation_results_combined.csv"

# Save the results to a CSV file at the specified location
write.csv(final_sav, file = file_path, row.names = FALSE, na = "")