# set up
library("sas7bdat")
library("survival")
library("survminer")
library("htmlTable") # for the table
# read data
hw6_dat1 <- read.sas7bdat("data/Assign6.sas7bdat")
## Some Data Wrangling - Create a descriptive categorical variable for treatment group:
hw6_dat1$TRT <- with(hw6_dat1, ifelse(rx==1, "Observation", ifelse(rx==2, "Lev", "Lev+5-FU")))
hw6_recurdata <- data.frame(subset(hw6_dat1, etype==1))
hw6_deathdata <- data.frame(subset(hw6_dat1, etype==2))
hw6_recurdata2 <- data.frame(subset(hw6_recurdata, hw6_recurdata$TRT=="Observation" | hw6_recurdata$TRT=="Lev+5-FU"))
hw6_recurfit <- survfit(Surv(time, status) ~ TRT, conf.int = 0.95, data = hw6_recurdata2)
# compute Kaplan-Meier survival estimates with survfit and Surv functions
# print(recurfit)
# the object recurfit contains the data needed to create a survival curve
# use this in ggsurvplot function
# save results as an object mysurv to make updates later
hw6_mysurv<-ggsurvplot(hw6_recurfit,
pval = FALSE, # do not print p-value of the Log-Rank test
pval.method = FALSE, # do not print name of the test
conf.int = TRUE, # 95% confidence limits of the survival time
linetype = "TRT", # assign line type by groups
surv.median.line = "hv", # h=horizontal v=vertical lines for the median survival time
xlab = "Time in days", # customize X axis label
ylab="Proportion of Subjects without \n a Recurrence", # customize Y axis label
break.time.by = 365, # break X axis into 1-year time intervals
xlim = c(0, 3285), # manually adjust x axis range
title = "Survival Plot of Time to Recurrence by Treatment Group with 95% CI",
legend.title = "TRT",
legend.labs = c("Lev+5-FU", "Observation"),
font.main = 13, # customize the font sizes as needed
font.x = 13,
font.y = 13,
font.tickslab = 11,
font.legend = 11,
risk.table = TRUE, # add number at risk table
risk.table.col = "TRT", # risk table color by groups
fontsize = 3, # risk table fontsize
tables.height = 0.21, # height of the tables below the plot
cumevents = TRUE, # add cumulative number of events table
cumevents.col = "TRT", # cumulative events table color by groups
ggtheme = theme_bw()
)
hw6_myr2.cox <- coxph(Surv(time, status) ~ TRT, data = hw6_recurdata2)
hw6_sumres2 <- summary(hw6_myr2.cox)
hw6_myhr <- round(hw6_sumres2$coefficients[2], 2) # the hazard ratio
hw6_rpval <- round(hw6_sumres2$coefficients[5], 7) # the p-value
hw6_mypval<- ifelse(hw6_rpval<0.001, "<0.001", as.factor(hw6_rpval)) # round p-value if very small
hw6_mylower <- round(hw6_sumres2$conf.int[3], 2) # LL of the CI
hw6_myupper <- round(hw6_sumres2$conf.int[4], 2) # UL of the CI
hw6_myci <- paste("(", hw6_mylower, ",", hw6_myupper, ")") # create label for the CI
hw6_mysurv$plot <- hw6_mysurv$plot+
ggplot2::annotate("text", size=3,
x = 2800, y = 0.15, # position of the text on the graph
label=paste("Hazard Ratio:",hw6_myhr,"\n 95% CI: ",hw6_myci, "\n p-value:", hw6_mypval))
hw6_mysurv
## create the table
hw6_fiti2=survdiff(Surv(time, status)~TRT, data=hw6_recurdata2)
hw6_plr=1 - pchisq(hw6_fiti2$chisq, length(hw6_fiti2$n) - 1); # plr
hw6_fiti=coxph(Surv(time, status) ~ TRT, data = hw6_recurdata2)
hw6_coefi=summary(hw6_fiti)$conf.int[c(1,3,4)]; #coefi
### Output ###
hw6_dati=hw6_recurdata2
hw6_ni0=c(sum(hw6_dati$TRT=="Lev+5-FU"), sum(hw6_dati$TRT=="Observation"))
hw6_ni1=c(sum(hw6_dati$TRT=="Lev+5-FU" & hw6_dati$status==1), sum(hw6_dati$TRT=="Observation" & hw6_dati$status==1))
hw6_ni2=c(sum(hw6_dati$TRT=="Lev+5-FU" & hw6_dati$status==0), sum(hw6_dati$TRT=="Observation" & hw6_dati$status==0))
hw6_ni3=rbind(hw6_ni2, hw6_ni1)
hw6_out2=data.frame(Term=c("Number assessed","Number censored (%)","Number of recurrences (%)"), ARM1=c(hw6_ni0[1], sprintf("%i (%.1f%%)",hw6_ni3[,1],hw6_ni3[,1]/hw6_ni0[1]*100)), ARM2=c(hw6_ni0[2], sprintf("%i (%.1f%%)",hw6_ni3[,2],hw6_ni3[,2]/hw6_ni0[2]*100)), stringsAsFactors = F)
### Percentile of time to relapse
hw6_fiti3=survfit(Surv(time, status)~TRT, data=hw6_recurdata2, conf.type="log-log")
hw6_out3=NULL
hw6_qi=c(0.25,0.5,0.75)
i=3
for (i in 1:length(hw6_qi)) {
hw6_temi=quantile(hw6_fiti3, hw6_qi[i])
if (i<length(hw6_qi)) {
hw6_temii=quantile(hw6_fiti3, hw6_qi[i+1])
hw6_temi1=hw6_temii$quantile
hw6_temi3=hw6_temi$upper
for (i2 in 1:2) {
if (!is.na(hw6_temi1[i2]) & is.na(hw6_temi3[i2])) {
hw6_temi$upper[i2]=hw6_temi1[i2]
}
}
}
hw6_temi1=hw6_temi$quantile; hw6_temi1[is.na(hw6_temi1)]=""
hw6_temi2=hw6_temi$lower; hw6_temi2[is.na(hw6_temi2)]="NE"
hw6_temi3=hw6_temi$upper; hw6_temi3[is.na(hw6_temi3)]="NE"
hw6_temi4=(sprintf("%s.0 (%s.0; %s.0)",hw6_temi1,hw6_temi2,hw6_temi3))
hw6_temi4=gsub("NE.0","NE",hw6_temi4)
hw6_temi4=gsub("NE; NE","NE",hw6_temi4)
hw6_temi4[which(hw6_temi1=="")]="NE"
hw6_temi4=rev(hw6_temi4)
hw6_out3=rbind(hw6_out3,hw6_temi4)
}
hw6_out3=data.frame(Term=c("25% percentile (95% CI)","Median (95% CI)","75% percentile (95% CI)"), ARM1=hw6_out3[,2], ARM2=hw6_out3[,1], stringsAsFactors = F)
## may need to adjust depending on order of ARM values in data
hw6_temi=sprintf("%.3f",hw6_plr)
hw6_temi=ifelse(hw6_temi=="0.000","<0.001","temi")
hw6_out4i=data.frame(Term="Two-sided P-value(c)",ARM1=hw6_temi,ARM2="",stringsAsFactors = F)
## HR
hw6_out4j=data.frame(Term="Hazard Ratio (95% CI)(b)",ARM1=sprintf("%.2f (%.2f; %.2f)",hw6_coefi[1],hw6_coefi[2],hw6_coefi[3]),ARM2="",stringsAsFactors = F)
hw6_out1=rbind(hw6_out2,"",hw6_out3,"",hw6_out4j,hw6_out4i)
hw6_out1=cbind(hw6_out1, iB=0, iI=1)
hw6_out1=rbind(data.frame(Term="Time to Recurrence (days)(a)",ARM1="",ARM2="",iB=0,iI=0,stringsAsFactors = F),hw6_out1)
colnames(hw6_out1)[c(2:3)]=c("Lev+5-FU","Observation")
colnames(hw6_out1)[-c(1,2,3)]=paste0(colnames(hw6_out1)[-c(1,2,3)],".delete")
## Observation first
hw6_out1=hw6_out1[,c(1,3,2,4,5)]
hw6_title1="<b>Table: Time to Recurrence and Number (%) of Subjects That Remained Recurrence Free</b>"
hw6_tfoot1="<br>(a) Based on Kaplan-Meier product limit estimates.
(b) Regression analysis of survival data based on Cox proportional hazards model with treatment as a factor.
(c) Log-rank test.
Note: NE stands for Not Estimable."
hw6_out1 %>% addHtmlTableStyle(align='c') %>% htmlTable(caption = hw6_title1,tfoot=hw6_tfoot1, align = 'c')
| Table: Time to Recurrence and Number (%) of Subjects That Remained Recurrence Free | |||||
| Term | Observation | Lev+5-FU | iB.delete | iI.delete | |
|---|---|---|---|---|---|
| 1 | Time to Recurrence (days)(a) | 0 | 0 | ||
| 2 | Number assessed | 315 | 304 | 0 | 1 |
| 3 | Number censored (%) | 138 (43.8%) | 185 (60.9%) | 0 | 1 |
| 4 | Number of recurrences (%) | 177 (56.2%) | 119 (39.1%) | 0 | 1 |
| 5 | 0 | 1 | |||
| 6 | 25% percentile (95% CI) | 308.0 (245.0; 398.0) | 591.0 (449.0; 711.0) | 0 | 1 |
| 7 | Median (95% CI) | 1236.0 (772.0; 2035.0) | NE | 0 | 1 |
| 8 | 75% percentile (95% CI) | NE | NE | 0 | 1 |
| 9 | 0 | 1 | |||
| 10 | Hazard Ratio (95% CI)(b) | 1.67 (1.32; 2.11) | 0 | 1 | |
| 11 | Two-sided P-value(c) | <0.001 | 0 | 1 | |
|
(a) Based on Kaplan-Meier product limit estimates. (b) Regression analysis of survival data based on Cox proportional hazards model with treatment as a factor. (c) Log-rank test. Note: NE stands for Not Estimable. |
|||||
hw6_myhaz<-ggsurvplot(hw6_recurfit,
fun = "cumhaz", # to plot the cumulative hazard function
conf.int = TRUE, # 95% CI of the survival function
linetype = "TRT", # Change line type by groups
xlab = "Time in days", # customize X axis label
break.time.by = 365, # break X axis into 1-year time intervals
xlim = c(0, 3285), # manually adjust the x axis range
title = "Cumulative Hazard Function Plot for Recurrence with 95% CI",
font.main = 13, # customize the font sizes as needed
font.x = 13,
font.y = 13,
font.tickslab = 11,
font.legend = 11,
legend.title = "TRT",
legend.labs = c("Lev+5-FU", "Observation"),
risk.table = TRUE, # add number at risk table
risk.table.col = "TRT", # risk table color by groups
fontsize = 3, # risk table fontsize
tables.height = 0.21, # height of the tables below the plot
ggtheme = theme_bw() + theme(plot.title=element_text(hjust=0.5))
)
#myhaz$data.survtable$time # look at the time point values
hw6_mytime<-list(hw6_myhaz$data.survtable$time[2:10]) # time values, skip time 0
#myhaz$data.survtable$n.event # look at the events data
hw6_mylev<-list(hw6_myhaz$data.survtable$n.event[2:10]) # n events for LEV5FU, skip time 0
# gather the information into a data frame for the LEF5FU group
hw6_mylevdat <- data.frame(
x = hw6_mytime,
leve = hw6_mylev)
names(hw6_mylevdat)[1]<-"x"
names(hw6_mylevdat)[2]<-"leve"
hw6_myobs<-list(hw6_myhaz$data.survtable$n.event[12:20]) # n events for observation, skip time 0
# make a separate data frame for the observation group
hw6_myobsdat <- data.frame(
x = hw6_mytime,
obse = hw6_myobs)
names(hw6_myobsdat)[1]<-"x"
names(hw6_myobsdat)[2]<-"obse"
hw6_myhaz$plot <- hw6_myhaz$plot+
ggplot2::geom_text(data=hw6_mylevdat, mapping=aes(x=x, y=-0.10, label = leve),size=2) +
ggplot2::geom_text(data=hw6_myobsdat, mapping=aes(x=x, y=-0.15, label = obse),size=2) +
ggplot2::annotate("text", x=300, y=-0.05, label=c("Number of Events in the Preceding Interval"),size=2)+
ggplot2::annotate("text", x=0, y=-0.10, label=c("Lev+5-FU"),size=2)+
ggplot2::annotate("text", x=0, y=-0.15, label=c("Observation"),size=2)
hw6_myhaz