# Installing useful package
library(kableExtra)
library(survival)
library(magrittr) 
library(ggplot2)
library(ggpubr)   # Combine ggplot

下述三題檢定,皆討論單尾的情況

Problem 1

Let Group 1 survival distribution chosen were Weibull, \(S_{b,a}(t) = e^{-(t/b)^a},t>0\), with b = 1 and a = 0.5, 1, 2, and 3.
Let Group 2 survival distributions are Weibull with constant hazard ratio \(\gamma=1.73,\ 2,\ 2.25,\ 2.37\) to Group 1.
The censoring distributions were equal and uniform \(U(0,\ 2)\).

Group 1
\[ \begin{align} S_{1}(t) &= e^{-(t/b)^a},t>0 \\ \Rightarrow \lambda_{1}(t) &= \frac{d-lnS_{1}(t)}{dt} = \frac{a}{b} * \left(\frac{t}{b} \right)^{a-1},\quad where\ t>0;\ a=0.5,1,2,3;\ b=1 \\ \Rightarrow T_{1} &\sim Weibull(shape = a,\ scale = b) \end{align} \]

Group 2
\[ \begin{align} 依題意知,\frac{\lambda_{2}(t)}{\lambda_{1}(t)} &= \gamma \\ \Rightarrow \lambda_{2}(t) &= \gamma*\lambda_{1}(t) = \gamma * \frac{a}{b} * \left(\frac{t}{b} \right)^{a-1} \\ \Rightarrow \Lambda_{2}(t) &= \int_{0}^{t} \frac{\gamma a}{b} * \left(\frac{u}{b} \right)^{a-1} du = \gamma * \left(\frac{t}{b} \right)^{a} \\ \Rightarrow S_{2}(t) &= exp\{-\Lambda_{2}(t)\} = exp \left \{ -\gamma * \left(\frac{t}{b} \right)^{a} \right \} \\ \Rightarrow f_{2}(t) &= S_{2}(t)*\lambda_{2}(t) \\ &= a*\frac{\gamma^{1/a}}{b}*\left(\frac{\gamma^{1/a}*t}{b} \right)^{a-1} * exp \left \{ - \left(\frac{\gamma^{1/a}*t}{b} \right)^{a} \right \},\quad \\ &where\ t>0;\ a=0.5,1,2,3;\ b=1;\ \gamma = 1.73,2,2.25,2.37 \\ \Rightarrow T_{2} &\sim Weibull\left(shape = a,\ scale = \frac{b}{\gamma^{1/a}}\right) \end{align} \]

  1. Graph the survival function versus time for the four scenarios (the survival functions for both groups must be in the graph)
    依題意可將Group1, Group2之四種情況(four scenarios)繪製如下

    # 先寫下各Group之survival function
    S.group1 <- function(t, a, b = 1)  exp(-(t/b)^{a})
    S.group2 <- function(t, gamma, a , b = 1)  {
      exp(- gamma * (t/b)^{a})
      }
    # Plot four scenarios graphics
    g1 <- ggplot() +
      stat_function(aes(x = seq(0, 5), colour = "Group1"),
                    fun = S.group1, 
                    args = list(a = 0.5, b = 1)) +
      stat_function(aes(x = seq(0, 5), colour = "Group2"),
                    fun = S.group2, 
                    args = list(gamma = 1.73, a = 0.5, b = 1),
                    linetype = "dotted") +
      labs(subtitle = "group 1:a = 0.5, b = 1 vs. group 2:r = 1.73",
           x = "t", y = "") +
      scale_colour_discrete(name = "Groups") +
      theme(legend.position = c(0.85, 0.7), 
            legend.background = element_rect(color = "black",
                                             fill = "grey90")) + 
      theme(legend.title = element_blank()) + 
      ylim(0, 1)
    
    g2 <- ggplot() +
      stat_function(aes(x = seq(0, 5), colour = "Group1"),
                    fun = S.group1, 
                    args = list(a = 1, b = 1)) +
      stat_function(aes(x = seq(0, 5), colour = "Group2"),
                    fun = S.group2, 
                    args = list(gamma = 2, a = 1, b = 1),
                    linetype = "dotted") +
      labs(subtitle = "group 1:a = 1, b = 1 vs. group 2:r = 2", 
           x = "t", y = "") +
      scale_colour_discrete(name = "Groups") +
      theme(legend.position = c(0.85, 0.7), 
            legend.background = element_rect(color = "black",
                                             fill = "grey90")) + 
      theme(legend.title = element_blank()) + 
      ylim(0, 1)
    
    g3 <- ggplot() +
      stat_function(aes(x = seq(0, 5), colour = "Group1"),
                    fun = S.group1, 
                    args = list(a = 2, b = 1)) +
      stat_function(aes(x = seq(0, 5), colour = "Group2"),
                    fun = S.group2, 
                    args = list(gamma = 2.25, a = 2, b = 1),
                    linetype = "dotted") +
      labs(subtitle = "group 1:a = 2, b = 1 vs. group 2:r = 2.25", 
           x = "t", y = "") +
      scale_colour_discrete(name = "Groups") +
      theme(legend.position = c(0.85, 0.7), 
            legend.background = element_rect(color = "black",
                                             fill = "grey90")) + 
      theme(legend.title = element_blank()) + 
      ylim(0, 1)
    
    g4 <- ggplot() +
      stat_function(aes(x = seq(0, 5), colour = "Group1"),
                    fun = S.group1, 
                    args = list(a = 3, b = 1)) +
      stat_function(aes(x = seq(0, 5), colour = "Group2"),
                    fun = S.group2, 
                    args = list(gamma = 2.37, a = 3, b = 1),
                    linetype = "dotted") +
      labs(subtitle = "group 1:a = 3, b = 1 vs. group 2:r = 2.37", 
           x = "t", y = "") +
      scale_colour_discrete(name = "Groups") +
      theme(legend.position = c(0.85, 0.7), 
            legend.background = element_rect(color = "black",
                                             fill = "grey90")) +
      theme(legend.title = element_blank()) + 
      ylim(0, 1) 
    
    ggarrange(g1, g2, g3, g4)

  2. Consider the sample size \(n_{1}=50\) (Group 1) and \(n_{2}=50\) (Group 2) and the null hypothesis is \(H_{0}:\lambda_{1}(t) = \lambda_{2}(t),\forall t\).
    Compare the power for log-rank test statistics and Gehan’s test statistics for the four scenarios (simulation: 3500 replications).

    # 1. 寫下生成樣本之函數?
    hw3.problem1 <- function(n = 50, a, gamma){
      # Group 1
      t <- rweibull(n, shape = a, scale = 1)
      C <- runif(n, min = 0, max = 2)
      group1 <- data.frame(t, C)
      group1$Y <- apply(group1, 1, min)
      group1$d <- ifelse(group1$t <= group1$C, 1, 0 )
      group1$x <- rep(0 , n)
    
      # Group 2
      t <- rweibull(n, shape = a, scale = (1/gamma^{1/a}) )
      group2 <- data.frame(t, C)
      group2$Y <- apply(group2, 1, min)
      group2$d <- ifelse(group2$t <= group2$C, 1, 0 )
      group2$x <- rep(1 , n)
    
      # Combine
      data.hw3 <- rbind(group1, group2)
      data.hw3$SurvObj <- with(data.hw3, Surv(data.hw3$Y, data.hw3$d)) # Use Surv() to build the standard survival object
      return(data.hw3[, 3:6])
    }
    # 2. 寫下計算Power之函數
    problem1.power <- function(a, gamma , alpha = 0.05){
      # T.S be test statistic
      # Log-rank test
      T.S1 <- replicate(n = 3500, 
                        expr = {
                          data.hw3 <- hw3.problem1(a = a, gamma = gamma)
                          survdiff(data.hw3$SurvObj ~ data.hw3$x, rho = 0)$chi
                          })
      power.log.rank <- mean( T.S1 > qchisq(1-alpha, df = 1)) # Log-rank test
      # Gehan test
      T.S.2 <- replicate(n = 3500, 
                         expr = {
                           data.hw3 <- hw3.problem1(a = a, gamma = gamma)
                           survdiff(data.hw3$SurvObj ~ data.hw3$x, rho = 1)$chi
                           })
      power.Gehan <- mean( T.S.2 > qchisq(1-alpha, df = 1)) # Gehan test
      Talbe.power <- cbind(power.log.rank, power.Gehan)
      return(Talbe.power)
      }
    # 3. Compare the power for log-rank test and Gehan's test for the four scenarios
    time.start <- Sys.time()
    Table.power1 <- rbind.data.frame(problem1.power(a = 0.5, gamma = 1.73),
                                     problem1.power(a = 1, gamma = 2),
                                     problem1.power(a = 2, gamma = 2.25),
                                     problem1.power(a = 3, gamma = 2.37))
    rownames(Table.power1) <- c(paste(seq(1:4), "scenario", ""))
    
    kable(Table.power1, "html", 
          caption = "Power", col.names = c("Log-Rank", "Gehan")) %>%
      kable_styling(bootstrap_options = "striped", full_width = F)
    Power
    Log-Rank Gehan
    1 scenario 0.6040000 0.5642857
    2 scenario 0.7914286 0.7400000
    3 scenario 0.8840000 0.8245714
    4 scenario 0.8880000 0.8382857

    上表即為在各情況(scenario)下,Log-rank test和Gehan test各自對應的檢定力(power).

    time.stop <- Sys.time()
    time.stop - time.start
    ## Time difference of 1.702643 mins
  3. Describe what you find in (b).
    由(b)小題的表格可看出,第一二種情況下,Log-rank test之檢定力均較Gehan test高(第二種情況差距較不明顯),而在第三四種情況下,兩者之檢定力則幾乎無差異

Problem 2

Consider the piecewise exponential random variables which satisfy \[ \left\{ \begin{aligned} \lambda(t) = r_{1}\quad &if\quad t \leq t_{0} \\ \lambda(t) = r_{2}\quad &if\quad t > t_{0} \end{aligned} \right.\] We give below a function that transforms standard exponential random variables into a (2-piece) piecewise exponential random variables.

The algorithm can be described as:
First step: Generate \(T \sim exp(1)\)
Second step: If \(T \leq t_{0}r_{1}\) then return \(T/r_{1}\) or if \(T > t_{0}r_{1}\) then return \(t_{0}+(T-t_{0}r{1})/r_{2}\).

Consider the case: (\(t_{0}=1,r{1}=0.25,r_{2}=0.5\)) for the Group 1 and (\(t_{0}=1,r{1}=0.75,r_{2}=0.5\)) for the Group 2.
The censoring distributions were equal and uniform \(U(0,\ 2)\).

  1. Derive the survival functions for each group.

    < Proof >

    \[ \begin{align} Given: \lambda(t) &= \left\{ \begin{aligned} r_{1} \quad if\quad t \leq t_{0} \\ r_{2} \quad if\quad t > t_{0} \end{aligned} \right. \\ \Rightarrow S(t) &= exp\left\{ -\int_{0}^{t} \lambda(u) du \right\} \\ &= \left\{ \begin{aligned} exp\{-r_{1}t\} \quad if\quad t \leq t_{0} \\ exp\left\{-\left[\int_{0}^{t_{0}} r_{1} du + \int_{t_{0}}^{t}r_{2}du\right]\right\} \quad if\quad t > t_{0} \end{aligned} \right. \\ &= \left\{ \begin{aligned} exp\{-r_{1}t\} \quad if\quad t \leq t_{0} \\ exp\left\{-\left[r_{1}t_{0}+r_{2}\left(t-t_{0}\right) \right]\right\} \quad if\quad t > t_{0} \end{aligned} \right. \\ &= \left\{ \begin{aligned} exp\{-r_{1}t\} \quad if\quad t \leq t_{0} \\ exp\{-\left[t_{0}(r_{1}-r_{2})+r_{2}t\right]\} \quad if\quad t > t_{0} \end{aligned} \right. \\ \end{align} \] ?ѤW?z?ҩ??i???D,?UGroup??survival function?? \[ \begin{align} S_{1}(t) = \left\{ \begin{aligned} exp\{-0.25t\} \quad if\quad t \leq 1 \\ exp\{-[-0.25+0.5t]\} \quad if\quad t > 1 \end{aligned} \right. \ ;\quad S_{2}(t) = \left\{ \begin{aligned} exp\{-0.75t\} \quad if\quad t \leq 1 \\ exp\{-[0.25+0.5t]\} \quad if\quad t > 1 \end{aligned} \right. \\ \end{align} \]

  2. Graph the survival function versus time for this case (the survival function for both groups must be in the graph).
    由(a)小題可得各survival function.
    故各group之survival function 如下所示

    # 先寫下survival function之通式
    S.problem2 <- function(t, t0, r1, r2) {
      z <- numeric(length(t))
      for(i in 1:length(t)){
        if(t[i] <= t0){
          z[i] <- exp(-r1*t[i])
          }else{
            z[i] <- exp(-( t0 *(r1-r2) + r2*t[i] ))
          } 
        }
      return(z)
      } 
    # Plot survival function
    ggplot() +
      stat_function(aes(x = seq(0, 5, 0.01), colour = "Group1"),
                fun = S.problem2, 
                args = list(t0 = 1, r1 = 0.25, r2 = 0.5)) +
      stat_function(aes(x = seq(0, 5, 0.01), colour = "Group2"),
                fun = S.problem2, 
                args = list(t0 = 1, r1 = 0.75, r2 = 0.5)) +
      geom_vline(xintercept = 1, linetype = "dotted") +
      labs(x = "t", y = "Survival function") +
      theme(legend.position = c(0.85, 0.85), 
            legend.background = element_rect(color = "black",
                                             fill = "grey90"),
            legend.title = element_blank()) 
  3. Consider the sample size \(n_{1}=50\) (Group 1) and \(n_{2}=50\) (Group 2) and the null hypothesis is \(H_{0}:\lambda_{1}(t) = \lambda_{2}(t),\forall t\).
    Compare the power for log-rank test statistics and Gehan’s test statistics for this scase (simulation: 3500 replications).

    # 1. 寫下生成2-piece exponetntial r.vs 之函數?
    r2piece.exp <- function(n , t0, r1, r2){
      z <- numeric(n)
      t <- rexp(n, rate = 1) # lambda = 1
      for(i in 1:n){
        if(t[i] <= t0 * r1){
          z[i] <- t[i]/r1
          }else{
            z[i] <- t0 + ( (t[i] - t0 * r1)/r2 )
          }
        }
      return(z)
      } 
    # 2. 寫下生成樣本之函數
    hw3.problem2 <- function(n = 50){
      # Group 1
      t <- r2piece.exp(n = n, t0 = 1, r1 = 0.25, r2 = 0.5)
      C <- runif(n, min = 0, max = 2)
      group1 <- data.frame(t, C)
      group1$Y <- apply(group1, 1, min)
      group1$d <- ifelse(group1$t <= group1$C, 1, 0 )
      group1$x <- rep(0 , n)
    
      # Group 2
      t <- r2piece.exp(n = n, t0 = 1, r1 = 0.75, r2 = 0.5)
      group2 <- data.frame(t, C)
      group2$Y <- apply(group2, 1, min)
      group2$d <- ifelse(group2$t <= group2$C, 1, 0 )
      group2$x <- rep(1 , n)
    
      # Combine
      data.hw3 <- rbind(group1, group2)
      data.hw3$SurvObj <- with(data.hw3, 
                               Surv(data.hw3$Y, data.hw3$d)) # Use Surv() to build the standard survival object
      return(data.hw3[, 3:6])
      }
    # 3. 寫下計算Power之函數
    problem2.power <- function(alpha = 0.05){
      # T.S be test statistic
      # Log-rank test
      T.S1 <- replicate(n = 3500, 
                        expr = {
                          data.hw3 <- hw3.problem2(n = 50)
                          survdiff(data.hw3$SurvObj ~ data.hw3$x, rho = 0)$chi
                    })
      power.log.rank <- mean( T.S1 > qchisq(1-alpha, df = 1)) # Log-rank test
      # Gehan test
      T.S.2 <- replicate(n = 3500, 
                         expr = {
                           data.hw3 <- hw3.problem2(n = 50)
                           survdiff(data.hw3$SurvObj ~  data.hw3$x, rho = 1)$chi
                     })
      power.Gehan <- mean( T.S.2 > qchisq(1-alpha, df = 1)) # Gehan test
      Talbe.power <- cbind(power.log.rank, power.Gehan)
      rownames(Talbe.power) <- c("Power")
      colnames(Talbe.power) <- c("Log-rank","Gehan")
      return(Talbe.power)
      }
    # 4. Compare the power for log-rank test  and Gehan's test 
    time.start <- Sys.time()
    Table.power2 <- rbind.data.frame(problem2.power(alpha = 0.05))
    
    kable(Table.power2, "html") %>%
      kable_styling(bootstrap_options = "striped", full_width = F)
    Log-rank Gehan
    Power 0.7288571 0.7688571

    上表即為Log-rank test和Gehan test各自對應的檢定力(power).

    time.stop <- Sys.time()
    time.stop - time.start 
    ## Time difference of 19.61153 secs
  4. Describe what you find in (c).
    由(c)小題的表格可看出,Gehan test之檢定力(power)略高於Log-rank test,(雖差距不大),此乃因Gehan test強調早期療效,由(a)小題之survival function可看出,在t < 1時,兩個survival function確實不相同,故此結果合理..

Problem 3

Consider the situation similar to Problem 2 but instead of this case: (\(t_{0}=0.5,r{1}=1,r_{2}=1\)) for the Group 1 and (\(t_{0}=0.5,r{1}=1,r_{2}=2\)) for the Group 2.
The censoring distributions were equal and uniform \(U(0,\ 2)\).

  1. Derive the survival functions for each group.

    < Proof >

    ???ҩ??L?{?p?PProblem 2 (a)?p?D,?G?i?o?UGroup??survival function?? \[ \begin{align} S_{1}(t) = \left\{ \begin{aligned} exp\{-t\} \quad if\quad t \leq 0.5 \\ exp\{-t\} \quad if\quad t > 0.5 \end{aligned} \right. =exp\{-t\},\ t>0\ ;\quad S_{2}(t) = \left\{ \begin{aligned} exp\{-t\} \quad if\quad t \leq 0.5 \\ exp\{-[-0.5+2t]\} \quad if\quad t > 0.5 \end{aligned} \right. \\ \end{align} \]

  2. Graph the survival function versus time for this case (the survival function for both groups must be in the graph).
    其證明過程如同Problem 2 (a)小題
    故可得各Group之survival function為

    # Plot the survival function
    ggplot() + 
      stat_function(aes(x = seq(0, 5, 0.01), colour = "Group1"),
                    fun = S.problem2,
                    args = list(t0 = 0.5, r1 = 1, r2 = 1)) +
      stat_function(aes(x = seq(0, 5, 0.01), colour = "Group2"),
                    fun = S.problem2,
                    args = list(t0 = 0.5, r1 = 1, r2 = 2)) +
      labs(x = "t", y = "Survival function") +
      geom_vline(xintercept = 0.5, linetype = "dotted") +
      theme(legend.position = c(0.85, 0.85), 
            legend.background = element_rect(color = "black",
                                             fill = "grey90"),
            legend.title = element_blank()) 
  3. Consider the sample size \(n_{1}=50\) (Group 1) and \(n_{2}=50\) (Group 2) and the null hypothesis is \(H_{0}:\lambda_{1}(t) = \lambda_{2}(t),\forall t\).
    Compare the power for log-rank test statistics and Gehan’s test statistics for this case (simulation: 3500 replications).

    # 1. 寫下生成樣本之函數
    hw3.problem3 <- function(n = 50){
      # Group 1
      t <- r2piece.exp(n = n, t0 = 0.5, r1 = 1, r2 = 1)
      C <- runif(n, min = 0, max = 2)
      group1 <- data.frame(t, C)
      group1$Y <- apply(group1, 1, min)
      group1$d <- ifelse(group1$t <= group1$C, 1, 0 )
      group1$x <- rep(0 , n)
    
      # Group 2
      t <- r2piece.exp(n = n, t0 = 0.5, r1 = 1, r2 = 2)
      group2 <- data.frame(t, C)
      group2$Y <- apply(group2, 1, min)
      group2$d <- ifelse(group2$t <= group2$C, 1, 0 )
      group2$x <- rep(1 , n)
    
      # Combine
      data.hw3 <- rbind(group1, group2)
      data.hw3$SurvObj <- with(data.hw3, 
                               Surv(data.hw3$Y, data.hw3$d)) # use Surv() to build the standard survival object
      return(data.hw3[, 3:6])
      }
    # 2. 寫下計算Power之函數
    problem3.power <- function(alpha = 0.05){
      # T.S be test statistic
      # Log-rank test
      T.S1 <- replicate(n = 3500, 
                        expr = {
                          data.hw3 <- hw3.problem3(n = 50)
                          survdiff(data.hw3$SurvObj ~ data.hw3$x, rho = 0)$chi
                    })
      power.log.rank <- mean( T.S1 > qchisq(1-alpha, df = 1)) # log-rank test
      # Gehan test
      T.S.2 <- replicate(n = 3500, 
                         expr = {
                           data.hw3 <- hw3.problem3(n = 50)
                           survdiff(data.hw3$SurvObj ~ data.hw3$x, rho = 1)$chi
                     })
      power.Gehan <- mean( T.S.2 > qchisq(1-alpha, df = 1)) # Gehan test
      Talbe.power <- cbind(power.log.rank, power.Gehan)
      rownames(Talbe.power) <- c("Power")
      colnames(Talbe.power) <- c("Log-rank","Gehan")
      return(Talbe.power)
      }
    # 3. Compare the power for log-rank test and Gehan's test 
    time.start <- Sys.time()
    Table.power3 <- rbind.data.frame(problem3.power(alpha = 0.05))
    
    kable(Table.power3, "html") %>%
      kable_styling(bootstrap_options = "striped", full_width = F)
    Log-rank Gehan
    Power 0.2011429 0.1131429

    上表即為Log-rank test和Gehan test各自對應的檢定力(power).

    time.stop <- Sys.time()
    time.stop - time.start
    ## Time difference of 25.90167 secs
  4. Describe what you find in (c).
    由(c)小題的表格可看出,Log-rank test之檢定力(power)較Gehan test 高,此乃因Gehan test強調早期療效,由(a)小題之survival function可看出,在t < 0.5時,兩個survival function相同,故此結果合理..