# Installing useful package
library(kableExtra)
library(survival)
library(magrittr) 
library(ggplot2)
library(ggpubr)   # Combine ggplot

下述三題檢定,皆討論單尾的情況

Problem 1

Let Group 1 survival distribution chosen were Weibull, \(S_{b,a}(t) = e^{-(t/b)^a},t>0\), with b = 1 and a = 0.5, 1, 2, and 3.
Let Group 2 survival distributions are Weibull with constant hazard ratio \(\gamma=1.73,\ 2,\ 2.25,\ 2.37\) to Group 1.
The censoring distributions were equal and uniform \(U(0,\ 2)\).

Group 1
\[ \begin{align} S_{1}(t) &= e^{-(t/b)^a},t>0 \\ \Rightarrow \lambda_{1}(t) &= \frac{d-lnS_{1}(t)}{dt} = \frac{a}{b} * \left(\frac{t}{b} \right)^{a-1},\quad where\ t>0;\ a=0.5,1,2,3;\ b=1 \\ \Rightarrow T_{1} &\sim Weibull(shape = a,\ scale = b) \end{align} \]

Group 2
\[ \begin{align} 依題意知,\frac{\lambda_{2}(t)}{\lambda_{1}(t)} &= \gamma \\ \Rightarrow \lambda_{2}(t) &= \gamma*\lambda_{1}(t) = \gamma * \frac{a}{b} * \left(\frac{t}{b} \right)^{a-1} \\ \Rightarrow \Lambda_{2}(t) &= \int_{0}^{t} \frac{\gamma a}{b} * \left(\frac{u}{b} \right)^{a-1} du = \gamma * \left(\frac{t}{b} \right)^{a} \\ \Rightarrow S_{2}(t) &= exp\{-\Lambda_{2}(t)\} = exp \left \{ -\gamma * \left(\frac{t}{b} \right)^{a} \right \} \\ \Rightarrow f_{2}(t) &= S_{2}(t)*\lambda_{2}(t) \\ &= a*\frac{\gamma^{1/a}}{b}*\left(\frac{\gamma^{1/a}*t}{b} \right)^{a-1} * exp \left \{ - \left(\frac{\gamma^{1/a}*t}{b} \right)^{a} \right \},\quad \\ &where\ t>0;\ a=0.5,1,2,3;\ b=1;\ \gamma = 1.73,2,2.25,2.37 \\ \Rightarrow T_{2} &\sim Weibull\left(shape = a,\ scale = \frac{b}{\gamma^{1/a}}\right) \end{align} \]

Graph the survival function versus time for the four scenarios (the survival functions for both groups must be in the graph)
依題意可將Group1, Group2之四種情況(four scenarios)繪製如下

# 先寫下各Group之survival function
S.group1 <- function(t, a, b = 1)  exp(-(t/b)^{a})
S.group2 <- function(t, gamma, a , b = 1)  {
  exp(- gamma * (t/b)^{a})
  }

# Plot four scenarios graphics
g1 <- ggplot() +
  stat_function(aes(x = seq(0, 5), colour = "Group1"),
                fun = S.group1, 
                args = list(a = 0.5, b = 1)) +
  stat_function(aes(x = seq(0, 5), colour = "Group2"),
                fun = S.group2, 
                args = list(gamma = 1.73, a = 0.5, b = 1),
                linetype = "dotted") +
  labs(subtitle = "group 1:a = 0.5, b = 1 vs. group 2:r = 1.73",
       x = "t", y = "") +
  scale_colour_discrete(name = "Groups") +
  theme(legend.position = c(0.85, 0.7), 
        legend.background = element_rect(color = "black",
                                         fill = "grey90")) + 
  theme(legend.title = element_blank()) + 
  ylim(0, 1)

g2 <- ggplot() +
  stat_function(aes(x = seq(0, 5), colour = "Group1"),
                fun = S.group1, 
                args = list(a = 1, b = 1)) +
  stat_function(aes(x = seq(0, 5), colour = "Group2"),
                fun = S.group2, 
                args = list(gamma = 2, a = 1, b = 1),
                linetype = "dotted") +
  labs(subtitle = "group 1:a = 1, b = 1 vs. group 2:r = 2", 
       x = "t", y = "") +
  scale_colour_discrete(name = "Groups") +
  theme(legend.position = c(0.85, 0.7), 
        legend.background = element_rect(color = "black",
                                         fill = "grey90")) + 
  theme(legend.title = element_blank()) + 
  ylim(0, 1)

g3 <- ggplot() +
  stat_function(aes(x = seq(0, 5), colour = "Group1"),
                fun = S.group1, 
                args = list(a = 2, b = 1)) +
  stat_function(aes(x = seq(0, 5), colour = "Group2"),
                fun = S.group2, 
                args = list(gamma = 2.25, a = 2, b = 1),
                linetype = "dotted") +
  labs(subtitle = "group 1:a = 2, b = 1 vs. group 2:r = 2.25", 
       x = "t", y = "") +
  scale_colour_discrete(name = "Groups") +
  theme(legend.position = c(0.85, 0.7), 
        legend.background = element_rect(color = "black",
                                         fill = "grey90")) + 
  theme(legend.title = element_blank()) + 
  ylim(0, 1)

g4 <- ggplot() +
  stat_function(aes(x = seq(0, 5), colour = "Group1"),
                fun = S.group1, 
                args = list(a = 3, b = 1)) +
  stat_function(aes(x = seq(0, 5), colour = "Group2"),
                fun = S.group2, 
                args = list(gamma = 2.37, a = 3, b = 1),
                linetype = "dotted") +
  labs(subtitle = "group 1:a = 3, b = 1 vs. group 2:r = 2.37", 
       x = "t", y = "") +
  scale_colour_discrete(name = "Groups") +
  theme(legend.position = c(0.85, 0.7), 
        legend.background = element_rect(color = "black",
                                         fill = "grey90")) +
  theme(legend.title = element_blank()) + 
  ylim(0, 1) 

ggarrange(g1, g2, g3, g4)

Consider the sample size \(n_{1}=50\) (Group 1) and \(n_{2}=50\) (Group 2) and the null hypothesis is \(H_{0}:\lambda_{1}(t) = \lambda_{2}(t),\forall t\).
Compare the power for log-rank test statistics and Gehan’s test statistics for the four scenarios (simulation: 3500 replications).

# 1. 寫下生成樣本之函數?
hw3.problem1 <- function(n = 50, a, gamma){
  # Group 1
  t <- rweibull(n, shape = a, scale = 1)
  C <- runif(n, min = 0, max = 2)
  group1 <- data.frame(t, C)
  group1$Y <- apply(group1, 1, min)
  group1$d <- ifelse(group1$t <= group1$C, 1, 0 )
  group1$x <- rep(0 , n)

  # Group 2
  t <- rweibull(n, shape = a, scale = (1/gamma^{1/a}) )
  group2 <- data.frame(t, C)
  group2$Y <- apply(group2, 1, min)
  group2$d <- ifelse(group2$t <= group2$C, 1, 0 )
  group2$x <- rep(1 , n)

  # Combine
  data.hw3 <- rbind(group1, group2)
  data.hw3$SurvObj <- with(data.hw3, Surv(data.hw3$Y, data.hw3$d)) # Use Surv() to build the standard survival object
  return(data.hw3[, 3:6])
}

# 2. 寫下計算Power之函數
problem1.power <- function(a, gamma , alpha = 0.05){
  # T.S be test statistic
  # Log-rank test
  T.S1 <- replicate(n = 3500, 
                    expr = {
                      data.hw3 <- hw3.problem1(a = a, gamma = gamma)
                      survdiff(data.hw3$SurvObj ~ data.hw3$x, rho = 0)$chi
                      })
  power.log.rank <- mean( T.S1 > qchisq(1-alpha, df = 1)) # Log-rank test
  # Gehan test
  T.S.2 <- replicate(n = 3500, 
                     expr = {
                       data.hw3 <- hw3.problem1(a = a, gamma = gamma)
                       survdiff(data.hw3$SurvObj ~ data.hw3$x, rho = 1)$chi
                       })
  power.Gehan <- mean( T.S.2 > qchisq(1-alpha, df = 1)) # Gehan test
  Talbe.power <- cbind(power.log.rank, power.Gehan)
  return(Talbe.power)
  }

# 3. Compare the power for log-rank test and Gehan's test for the four scenarios
time.start <- Sys.time()
Table.power1 <- rbind.data.frame(problem1.power(a = 0.5, gamma = 1.73),
                                 problem1.power(a = 1, gamma = 2),
                                 problem1.power(a = 2, gamma = 2.25),
                                 problem1.power(a = 3, gamma = 2.37))
rownames(Table.power1) <- c(paste(seq(1:4), "scenario", ""))

kable(Table.power1, "html", 
      caption = "Power", col.names = c("Log-Rank", "Gehan")) %>%
  kable_styling(bootstrap_options = "striped", full_width = F)

Power
	Log-Rank	Gehan
1 scenario	0.6040000	0.5642857
2 scenario	0.7914286	0.7400000
3 scenario	0.8840000	0.8245714
4 scenario	0.8880000	0.8382857

上表即為在各情況(scenario)下,Log-rank test和Gehan test各自對應的檢定力(power).

time.stop <- Sys.time()
time.stop - time.start

## Time difference of 1.702643 mins

Describe what you find in (b).
由(b)小題的表格可看出,第一二種情況下,Log-rank test之檢定力均較Gehan test高(第二種情況差距較不明顯),而在第三四種情況下,兩者之檢定力則幾乎無差異

Problem 2

Consider the piecewise exponential random variables which satisfy \[ \left\{ \begin{aligned} \lambda(t) = r_{1}\quad &if\quad t \leq t_{0} \\ \lambda(t) = r_{2}\quad &if\quad t > t_{0} \end{aligned} \right.\] We give below a function that transforms standard exponential random variables into a (2-piece) piecewise exponential random variables.

The algorithm can be described as:
First step: Generate \(T \sim exp(1)\)
Second step: If \(T \leq t_{0}r_{1}\) then return \(T/r_{1}\) or if \(T > t_{0}r_{1}\) then return \(t_{0}+(T-t_{0}r{1})/r_{2}\).

Consider the case: (\(t_{0}=1,r{1}=0.25,r_{2}=0.5\)) for the Group 1 and (\(t_{0}=1,r{1}=0.75,r_{2}=0.5\)) for the Group 2.
The censoring distributions were equal and uniform \(U(0,\ 2)\).

Derive the survival functions for each group.

< Proof >

\[ \begin{align} Given: \lambda(t) &= \left\{ \begin{aligned} r_{1} \quad if\quad t \leq t_{0} \\ r_{2} \quad if\quad t > t_{0} \end{aligned} \right. \\ \Rightarrow S(t) &= exp\left\{ -\int_{0}^{t} \lambda(u) du \right\} \\ &= \left\{ \begin{aligned} exp\{-r_{1}t\} \quad if\quad t \leq t_{0} \\ exp\left\{-\left[\int_{0}^{t_{0}} r_{1} du + \int_{t_{0}}^{t}r_{2}du\right]\right\} \quad if\quad t > t_{0} \end{aligned} \right. \\ &= \left\{ \begin{aligned} exp\{-r_{1}t\} \quad if\quad t \leq t_{0} \\ exp\left\{-\left[r_{1}t_{0}+r_{2}\left(t-t_{0}\right) \right]\right\} \quad if\quad t > t_{0} \end{aligned} \right. \\ &= \left\{ \begin{aligned} exp\{-r_{1}t\} \quad if\quad t \leq t_{0} \\ exp\{-\left[t_{0}(r_{1}-r_{2})+r_{2}t\right]\} \quad if\quad t > t_{0} \end{aligned} \right. \\ \end{align} \] ?ѤW?z?ҩ??i???D,?UGroup??survival function?? \[ \begin{align} S_{1}(t) = \left\{ \begin{aligned} exp\{-0.25t\} \quad if\quad t \leq 1 \\ exp\{-[-0.25+0.5t]\} \quad if\quad t > 1 \end{aligned} \right. \ ;\quad S_{2}(t) = \left\{ \begin{aligned} exp\{-0.75t\} \quad if\quad t \leq 1 \\ exp\{-[0.25+0.5t]\} \quad if\quad t > 1 \end{aligned} \right. \\ \end{align} \]

Graph the survival function versus time for this case (the survival function for both groups must be in the graph).
由(a)小題可得各survival function.
故各group之survival function 如下所示

# 先寫下survival function之通式
S.problem2 <- function(t, t0, r1, r2) {
  z <- numeric(length(t))
  for(i in 1:length(t)){
    if(t[i] <= t0){
      z[i] <- exp(-r1*t[i])
      }else{
        z[i] <- exp(-( t0 *(r1-r2) + r2*t[i] ))
      } 
    }
  return(z)
  }

# Plot survival function
ggplot() +
  stat_function(aes(x = seq(0, 5, 0.01), colour = "Group1"),
            fun = S.problem2, 
            args = list(t0 = 1, r1 = 0.25, r2 = 0.5)) +
  stat_function(aes(x = seq(0, 5, 0.01), colour = "Group2"),
            fun = S.problem2, 
            args = list(t0 = 1, r1 = 0.75, r2 = 0.5)) +
  geom_vline(xintercept = 1, linetype = "dotted") +
  labs(x = "t", y = "Survival function") +
  theme(legend.position = c(0.85, 0.85), 
        legend.background = element_rect(color = "black",
                                         fill = "grey90"),
        legend.title = element_blank())

# 1. 寫下生成2-piece exponetntial r.vs 之函數?
r2piece.exp <- function(n , t0, r1, r2){
  z <- numeric(n)
  t <- rexp(n, rate = 1) # lambda = 1
  for(i in 1:n){
    if(t[i] <= t0 * r1){
      z[i] <- t[i]/r1
      }else{
        z[i] <- t0 + ( (t[i] - t0 * r1)/r2 )
      }
    }
  return(z)
  }

# 2. 寫下生成樣本之函數
hw3.problem2 <- function(n = 50){
  # Group 1
  t <- r2piece.exp(n = n, t0 = 1, r1 = 0.25, r2 = 0.5)
  C <- runif(n, min = 0, max = 2)
  group1 <- data.frame(t, C)
  group1$Y <- apply(group1, 1, min)
  group1$d <- ifelse(group1$t <= group1$C, 1, 0 )
  group1$x <- rep(0 , n)

  # Group 2
  t <- r2piece.exp(n = n, t0 = 1, r1 = 0.75, r2 = 0.5)
  group2 <- data.frame(t, C)
  group2$Y <- apply(group2, 1, min)
  group2$d <- ifelse(group2$t <= group2$C, 1, 0 )
  group2$x <- rep(1 , n)

  # Combine
  data.hw3 <- rbind(group1, group2)
  data.hw3$SurvObj <- with(data.hw3, 
                           Surv(data.hw3$Y, data.hw3$d)) # Use Surv() to build the standard survival object
  return(data.hw3[, 3:6])
  }

# 3. 寫下計算Power之函數
problem2.power <- function(alpha = 0.05){
  # T.S be test statistic
  # Log-rank test
  T.S1 <- replicate(n = 3500, 
                    expr = {
                      data.hw3 <- hw3.problem2(n = 50)
                      survdiff(data.hw3$SurvObj ~ data.hw3$x, rho = 0)$chi
                })
  power.log.rank <- mean( T.S1 > qchisq(1-alpha, df = 1)) # Log-rank test
  # Gehan test
  T.S.2 <- replicate(n = 3500, 
                     expr = {
                       data.hw3 <- hw3.problem2(n = 50)
                       survdiff(data.hw3$SurvObj ~  data.hw3$x, rho = 1)$chi
                 })
  power.Gehan <- mean( T.S.2 > qchisq(1-alpha, df = 1)) # Gehan test
  Talbe.power <- cbind(power.log.rank, power.Gehan)
  rownames(Talbe.power) <- c("Power")
  colnames(Talbe.power) <- c("Log-rank","Gehan")
  return(Talbe.power)
  }

# 4. Compare the power for log-rank test  and Gehan's test 
time.start <- Sys.time()
Table.power2 <- rbind.data.frame(problem2.power(alpha = 0.05))

kable(Table.power2, "html") %>%
  kable_styling(bootstrap_options = "striped", full_width = F)

	Log-rank	Gehan
Power	0.7288571	0.7688571

上表即為Log-rank test和Gehan test各自對應的檢定力(power).

time.stop <- Sys.time()
time.stop - time.start

## Time difference of 19.61153 secs

Describe what you find in (c).
由(c)小題的表格可看出,Gehan test之檢定力(power)略高於Log-rank test,(雖差距不大),此乃因Gehan test強調早期療效，由(a)小題之survival function可看出,在t < 1時,兩個survival function確實不相同,故此結果合理..

Problem 3

Consider the situation similar to Problem 2 but instead of this case: (\(t_{0}=0.5,r{1}=1,r_{2}=1\)) for the Group 1 and (\(t_{0}=0.5,r{1}=1,r_{2}=2\)) for the Group 2.
The censoring distributions were equal and uniform \(U(0,\ 2)\).

Derive the survival functions for each group.

< Proof >

???ҩ??L?{?p?PProblem 2 (a)?p?D,?G?i?o?UGroup??survival function?? \[ \begin{align} S_{1}(t) = \left\{ \begin{aligned} exp\{-t\} \quad if\quad t \leq 0.5 \\ exp\{-t\} \quad if\quad t > 0.5 \end{aligned} \right. =exp\{-t\},\ t>0\ ;\quad S_{2}(t) = \left\{ \begin{aligned} exp\{-t\} \quad if\quad t \leq 0.5 \\ exp\{-[-0.5+2t]\} \quad if\quad t > 0.5 \end{aligned} \right. \\ \end{align} \]

Graph the survival function versus time for this case (the survival function for both groups must be in the graph).
其證明過程如同Problem 2 (a)小題
故可得各Group之survival function為

# Plot the survival function
ggplot() + 
  stat_function(aes(x = seq(0, 5, 0.01), colour = "Group1"),
                fun = S.problem2,
                args = list(t0 = 0.5, r1 = 1, r2 = 1)) +
  stat_function(aes(x = seq(0, 5, 0.01), colour = "Group2"),
                fun = S.problem2,
                args = list(t0 = 0.5, r1 = 1, r2 = 2)) +
  labs(x = "t", y = "Survival function") +
  geom_vline(xintercept = 0.5, linetype = "dotted") +
  theme(legend.position = c(0.85, 0.85), 
        legend.background = element_rect(color = "black",
                                         fill = "grey90"),
        legend.title = element_blank())

# 1. 寫下生成樣本之函數
hw3.problem3 <- function(n = 50){
  # Group 1
  t <- r2piece.exp(n = n, t0 = 0.5, r1 = 1, r2 = 1)
  C <- runif(n, min = 0, max = 2)
  group1 <- data.frame(t, C)
  group1$Y <- apply(group1, 1, min)
  group1$d <- ifelse(group1$t <= group1$C, 1, 0 )
  group1$x <- rep(0 , n)

  # Group 2
  t <- r2piece.exp(n = n, t0 = 0.5, r1 = 1, r2 = 2)
  group2 <- data.frame(t, C)
  group2$Y <- apply(group2, 1, min)
  group2$d <- ifelse(group2$t <= group2$C, 1, 0 )
  group2$x <- rep(1 , n)

  # Combine
  data.hw3 <- rbind(group1, group2)
  data.hw3$SurvObj <- with(data.hw3, 
                           Surv(data.hw3$Y, data.hw3$d)) # use Surv() to build the standard survival object
  return(data.hw3[, 3:6])
  }

# 2. 寫下計算Power之函數
problem3.power <- function(alpha = 0.05){
  # T.S be test statistic
  # Log-rank test
  T.S1 <- replicate(n = 3500, 
                    expr = {
                      data.hw3 <- hw3.problem3(n = 50)
                      survdiff(data.hw3$SurvObj ~ data.hw3$x, rho = 0)$chi
                })
  power.log.rank <- mean( T.S1 > qchisq(1-alpha, df = 1)) # log-rank test
  # Gehan test
  T.S.2 <- replicate(n = 3500, 
                     expr = {
                       data.hw3 <- hw3.problem3(n = 50)
                       survdiff(data.hw3$SurvObj ~ data.hw3$x, rho = 1)$chi
                 })
  power.Gehan <- mean( T.S.2 > qchisq(1-alpha, df = 1)) # Gehan test
  Talbe.power <- cbind(power.log.rank, power.Gehan)
  rownames(Talbe.power) <- c("Power")
  colnames(Talbe.power) <- c("Log-rank","Gehan")
  return(Talbe.power)
  }

# 3. Compare the power for log-rank test and Gehan's test 
time.start <- Sys.time()
Table.power3 <- rbind.data.frame(problem3.power(alpha = 0.05))

kable(Table.power3, "html") %>%
  kable_styling(bootstrap_options = "striped", full_width = F)

	Log-rank	Gehan
Power	0.2011429	0.1131429

上表即為Log-rank test和Gehan test各自對應的檢定力(power).

time.stop <- Sys.time()
time.stop - time.start

## Time difference of 25.90167 secs

Describe what you find in (c).
由(c)小題的表格可看出,Log-rank test之檢定力(power)較Gehan test 高,此乃因Gehan test強調早期療效，由(a)小題之survival function可看出,在t < 0.5時,兩個survival function相同,故此結果合理..

Survival analysis HW3

林暐翔

2018/5/20

Problem 1

Problem 2

Problem 3