Assignment 6 Solution

Figure 1

# set up
library("sas7bdat")
library("survival")
library("survminer")
library("htmlTable") # for the table

# read data
hw6_dat1 <- read.sas7bdat("data/Assign6.sas7bdat")

## Some Data Wrangling - Create a descriptive categorical variable for treatment group: 
hw6_dat1$TRT <- with(hw6_dat1, ifelse(rx==1, "Observation", ifelse(rx==2, "Lev", "Lev+5-FU")))

hw6_recurdata <- data.frame(subset(hw6_dat1, etype==1))
hw6_deathdata <- data.frame(subset(hw6_dat1, etype==2))
hw6_recurdata2 <- data.frame(subset(hw6_recurdata, hw6_recurdata$TRT=="Observation" | hw6_recurdata$TRT=="Lev+5-FU"))

hw6_recurfit <- survfit(Surv(time, status) ~ TRT, conf.int = 0.95, data = hw6_recurdata2) 
# compute Kaplan-Meier survival estimates with survfit and Surv functions
# print(recurfit)
# the object recurfit contains the data needed to create a survival curve 
# use this in ggsurvplot function
# save results as an object mysurv to make updates later
hw6_mysurv<-ggsurvplot(hw6_recurfit,
           pval = FALSE,            # do not print p-value of the Log-Rank test
           pval.method = FALSE,     # do not print name of the test
           conf.int = TRUE,         # 95% confidence limits of the survival time
           linetype = "TRT",        # assign line type by groups
           surv.median.line = "hv", # h=horizontal v=vertical lines for the median survival time
           xlab = "Time in days",   # customize X axis label
           ylab="Proportion of Subjects without \n a Recurrence", # customize Y axis label
           break.time.by = 365,     # break X axis into 1-year time intervals
           xlim = c(0, 3285),       # manually adjust x axis range
           title = "Survival Plot of Time to Recurrence by Treatment Group with 95% CI",
           legend.title = "TRT",
           legend.labs = c("Lev+5-FU", "Observation"),
           font.main = 13,          # customize the font sizes as needed
           font.x = 13, 
           font.y = 13,
           font.tickslab = 11,
           font.legend = 11,
           risk.table = TRUE,       # add number at risk table
           risk.table.col = "TRT",  # risk table color by groups
           fontsize = 3,            # risk table fontsize
           tables.height = 0.21,    # height of the tables below the plot
           cumevents = TRUE,        # add cumulative number of events table
           cumevents.col = "TRT",   # cumulative events table color by groups
           ggtheme = theme_bw()
           )


hw6_myr2.cox <- coxph(Surv(time, status) ~ TRT, data =  hw6_recurdata2)
hw6_sumres2 <- summary(hw6_myr2.cox)  

hw6_myhr <- round(hw6_sumres2$coefficients[2], 2)      # the hazard ratio
hw6_rpval <- round(hw6_sumres2$coefficients[5], 7)     # the p-value
hw6_mypval<- ifelse(hw6_rpval<0.001, "<0.001", as.factor(hw6_rpval))   # round p-value if very small
hw6_mylower <- round(hw6_sumres2$conf.int[3], 2)       # LL of the CI
hw6_myupper <- round(hw6_sumres2$conf.int[4], 2)       # UL of the CI
hw6_myci <- paste("(", hw6_mylower, ",", hw6_myupper, ")") # create label for the CI 

hw6_mysurv$plot <- hw6_mysurv$plot+ 
  ggplot2::annotate("text", size=3,
                    x = 2800, y = 0.15, # position of the text on the graph
                    label=paste("Hazard Ratio:",hw6_myhr,"\n 95% CI: ",hw6_myci, "\n p-value:", hw6_mypval))
hw6_mysurv

## create the table

hw6_fiti2=survdiff(Surv(time, status)~TRT, data=hw6_recurdata2)
hw6_plr=1 - pchisq(hw6_fiti2$chisq, length(hw6_fiti2$n) - 1); # plr

hw6_fiti=coxph(Surv(time, status) ~ TRT, data =  hw6_recurdata2)
hw6_coefi=summary(hw6_fiti)$conf.int[c(1,3,4)]; #coefi

###  Output  ###
hw6_dati=hw6_recurdata2
hw6_ni0=c(sum(hw6_dati$TRT=="Lev+5-FU"), sum(hw6_dati$TRT=="Observation"))
hw6_ni1=c(sum(hw6_dati$TRT=="Lev+5-FU" & hw6_dati$status==1), sum(hw6_dati$TRT=="Observation" & hw6_dati$status==1))
hw6_ni2=c(sum(hw6_dati$TRT=="Lev+5-FU" & hw6_dati$status==0), sum(hw6_dati$TRT=="Observation" & hw6_dati$status==0))

hw6_ni3=rbind(hw6_ni2, hw6_ni1)

hw6_out2=data.frame(Term=c("Number assessed","Number censored (%)","Number of recurrences (%)"), ARM1=c(hw6_ni0[1], sprintf("%i (%.1f%%)",hw6_ni3[,1],hw6_ni3[,1]/hw6_ni0[1]*100)), ARM2=c(hw6_ni0[2], sprintf("%i (%.1f%%)",hw6_ni3[,2],hw6_ni3[,2]/hw6_ni0[2]*100)), stringsAsFactors = F)


### Percentile of time to relapse
hw6_fiti3=survfit(Surv(time, status)~TRT, data=hw6_recurdata2, conf.type="log-log")

hw6_out3=NULL
hw6_qi=c(0.25,0.5,0.75)
i=3

for (i in 1:length(hw6_qi)) {
  hw6_temi=quantile(hw6_fiti3, hw6_qi[i])
  if (i<length(hw6_qi)) {
    hw6_temii=quantile(hw6_fiti3, hw6_qi[i+1])
    
    hw6_temi1=hw6_temii$quantile
    hw6_temi3=hw6_temi$upper
    for (i2 in 1:2) {
      if (!is.na(hw6_temi1[i2]) & is.na(hw6_temi3[i2])) {
        hw6_temi$upper[i2]=hw6_temi1[i2]
      }
    }
  }
  
  hw6_temi1=hw6_temi$quantile; hw6_temi1[is.na(hw6_temi1)]=""
  hw6_temi2=hw6_temi$lower; hw6_temi2[is.na(hw6_temi2)]="NE"
  hw6_temi3=hw6_temi$upper; hw6_temi3[is.na(hw6_temi3)]="NE"
  
  
  hw6_temi4=(sprintf("%s.0 (%s.0; %s.0)",hw6_temi1,hw6_temi2,hw6_temi3))
  hw6_temi4=gsub("NE.0","NE",hw6_temi4)
  hw6_temi4=gsub("NE; NE","NE",hw6_temi4)
  hw6_temi4[which(hw6_temi1=="")]="NE"
  hw6_temi4=rev(hw6_temi4)
  hw6_out3=rbind(hw6_out3,hw6_temi4)
}
hw6_out3=data.frame(Term=c("25% percentile (95% CI)","Median (95% CI)","75% percentile (95% CI)"), ARM1=hw6_out3[,2], ARM2=hw6_out3[,1], stringsAsFactors = F) 
## may need to adjust depending on order of ARM values in data

hw6_temi=sprintf("%.3f",hw6_plr)
hw6_temi=ifelse(hw6_temi=="0.000","<0.001","temi")
hw6_out4i=data.frame(Term="Two-sided P-value(c)",ARM1=hw6_temi,ARM2="",stringsAsFactors = F)

## HR
hw6_out4j=data.frame(Term="Hazard Ratio (95% CI)(b)",ARM1=sprintf("%.2f (%.2f; %.2f)",hw6_coefi[1],hw6_coefi[2],hw6_coefi[3]),ARM2="",stringsAsFactors = F)

hw6_out1=rbind(hw6_out2,"",hw6_out3,"",hw6_out4j,hw6_out4i)
hw6_out1=cbind(hw6_out1, iB=0, iI=1)
hw6_out1=rbind(data.frame(Term="Time to Recurrence (days)(a)",ARM1="",ARM2="",iB=0,iI=0,stringsAsFactors = F),hw6_out1)
colnames(hw6_out1)[c(2:3)]=c("Lev+5-FU","Observation")

colnames(hw6_out1)[-c(1,2,3)]=paste0(colnames(hw6_out1)[-c(1,2,3)],".delete")


## Observation first
hw6_out1=hw6_out1[,c(1,3,2,4,5)]


hw6_title1="<b>Table: Time to Recurrence and Number (%) of Subjects That Remained Recurrence Free</b>"

hw6_tfoot1="<br>(a) Based on Kaplan-Meier product limit estimates.
(b) Regression analysis of survival data based on Cox proportional hazards model with treatment as a factor.
(c) Log-rank test.
Note: NE stands for Not Estimable."

hw6_out1 %>% addHtmlTableStyle(align='c') %>% htmlTable(caption = hw6_title1,tfoot=hw6_tfoot1, align = 'c') 
Table: Time to Recurrence and Number (%) of Subjects That Remained Recurrence Free
Term Observation Lev+5-FU iB.delete iI.delete
1 Time to Recurrence (days)(a) 0 0
2 Number assessed 315 304 0 1
3 Number censored (%) 138 (43.8%) 185 (60.9%) 0 1
4 Number of recurrences (%) 177 (56.2%) 119 (39.1%) 0 1
5 0 1
6 25% percentile (95% CI) 308.0 (245.0; 398.0) 591.0 (449.0; 711.0) 0 1
7 Median (95% CI) 1236.0 (772.0; 2035.0) NE 0 1
8 75% percentile (95% CI) NE NE 0 1
9 0 1
10 Hazard Ratio (95% CI)(b) 1.67 (1.32; 2.11) 0 1
11 Two-sided P-value(c) <0.001 0 1

(a) Based on Kaplan-Meier product limit estimates.
(b) Regression analysis of survival data based on Cox proportional hazards model with treatment as a factor.
(c) Log-rank test.
Note: NE stands for Not Estimable.

Figure 2

hw6_myhaz<-ggsurvplot(hw6_recurfit,
                  fun = "cumhaz",          # to plot the cumulative hazard function
                  conf.int = TRUE,         # 95% CI of the survival function
                  linetype = "TRT",        # Change line type by groups
                  xlab = "Time in days",   # customize X axis label
                  break.time.by = 365,     # break X axis into 1-year time intervals
                  xlim = c(0, 3285),       # manually adjust the x axis range
                  title = "Cumulative Hazard Function Plot for Recurrence with 95% CI",
                  font.main = 13,          # customize the font sizes as needed
                  font.x = 13, 
                  font.y = 13,
                  font.tickslab = 11,
                  font.legend = 11,
                  legend.title = "TRT",
                  legend.labs = c("Lev+5-FU", "Observation"),
                  risk.table = TRUE,       # add number at risk table
                  risk.table.col = "TRT",  # risk table color by groups
                  fontsize = 3,            # risk table fontsize
                  tables.height = 0.21,    # height of the tables below the plot
                  ggtheme = theme_bw() + theme(plot.title=element_text(hjust=0.5))
                  ) 

#myhaz$data.survtable$time                        # look at the time point values
hw6_mytime<-list(hw6_myhaz$data.survtable$time[2:10])    # time values, skip time 0

#myhaz$data.survtable$n.event                     # look at the events data
hw6_mylev<-list(hw6_myhaz$data.survtable$n.event[2:10])  # n events for LEV5FU, skip time 0

# gather the information into a data frame for the LEF5FU group
hw6_mylevdat <- data.frame(
  x = hw6_mytime,
  leve = hw6_mylev)
names(hw6_mylevdat)[1]<-"x"
names(hw6_mylevdat)[2]<-"leve"


hw6_myobs<-list(hw6_myhaz$data.survtable$n.event[12:20]) # n events for observation, skip time 0
# make a separate data frame for the observation group
hw6_myobsdat <- data.frame(
  x = hw6_mytime,
  obse = hw6_myobs)
names(hw6_myobsdat)[1]<-"x"
names(hw6_myobsdat)[2]<-"obse"

hw6_myhaz$plot <- hw6_myhaz$plot+ 
  ggplot2::geom_text(data=hw6_mylevdat, mapping=aes(x=x, y=-0.10, label = leve),size=2) +
  ggplot2::geom_text(data=hw6_myobsdat, mapping=aes(x=x, y=-0.15, label = obse),size=2) +
  ggplot2::annotate("text", x=300, y=-0.05, label=c("Number of Events in the Preceding Interval"),size=2)+
  ggplot2::annotate("text", x=0, y=-0.10, label=c("Lev+5-FU"),size=2)+
  ggplot2::annotate("text", x=0, y=-0.15, label=c("Observation"),size=2)

hw6_myhaz