# Load all libraries and sources required to run the script
library(tidyverse)
library(ggthemes)
library(survival)
library(survminer)
library(gtsummary)
#library(rms)
# Default ggplot settings
# Nutrient treatment colors
Fill.colour<-c ("#4A6CAA", "#469B53", "#AA4A74")
ggthe_bw<-theme(plot.background=element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
legend.box.background = element_rect(),
panel.background =element_rect(fill = NA, color = "black")
)+
theme_bw()
Try survival package
# Data
Survival.data<-read.csv("Data/Acer_Mortality.csv", header = TRUE)
summary(Survival.data)
## Fragment Treatment Replicate Date Time_Point
## Ac_101 : 1 Ambient:39 R1:63 03/08/2018:37 T23 :37
## Ac_102 : 1 N :41 R2:57 02/05/2018:25 T14 :25
## Ac_103 : 1 N_P :40 03/05/2018:16 T22 :16
## Ac_104 : 1 02/19/2018:11 T18 :11
## Ac_105 : 1 03/01/2018: 9 T21 : 9
## Ac_106 : 1 02/15/2018: 8 T17 : 8
## (Other):114 (Other) :14 (Other):14
## Phase Genotype Gen_Treat Days_Experiment Days_Survivor
## Heat :89 G_07:26 Ac_48_Ambient:10 Min. : 65.00 Min. :179.0
## Nutrients:30 G_08: 8 Ac_62_N :10 1st Qu.: 87.25 1st Qu.:201.2
## Ramping : 1 G_31:16 Ac_62_N_P :10 Median :106.00 Median :220.0
## G_48:28 Ac_07_Ambient: 9 Mean : 99.90 Mean :213.9
## G_50:13 Ac_07_N : 9 3rd Qu.:113.00 3rd Qu.:227.0
## G_62:29 Ac_48_N : 9 Max. :113.00 Max. :227.0
## (Other) :63
## Fu.time_tot Fu.stat_tot Fu.time_texp Fu.stat_exp
## Min. :179.0 Min. :0.0000 Min. : 65.00 Min. :0.0000
## 1st Qu.:201.2 1st Qu.:0.0000 1st Qu.: 87.25 1st Qu.:0.0000
## Median :220.0 Median :0.0000 Median :106.00 Median :0.0000
## Mean :213.9 Mean :0.4833 Mean : 99.90 Mean :0.4833
## 3rd Qu.:227.0 3rd Qu.:1.0000 3rd Qu.:113.00 3rd Qu.:1.0000
## Max. :227.0 Max. :1.0000 Max. :113.00 Max. :1.0000
##
summary(Survival.data$Genotype)
## G_07 G_08 G_31 G_48 G_50 G_62
## 26 8 16 28 13 29
Survival.data$Genotype<-factor(Survival.data$Genotype,
levels=c("G_48", "G_62","G_31", "G_08","G_07", "G_50"))
#Survival.data$Treatment<-factor(Survival.data$Treatment,
# levels=c("N", "N_P","Ambient"))
summary(Survival.data$Treatment)
## Ambient N N_P
## 39 41 40
## Add survival object (Fit survival data using the Kaplan-Meier method)
surv_object <- Surv(time = Survival.data$Fu.time_texp, event = Survival.data$Fu.stat_exp)
surv_object
## [1] 113+ 113+ 113+ 82+ 113+ 113+ 113+ 113+ 82+ 113+ 113+ 82+ 113+ 113+ 113+
## [16] 113+ 113+ 113+ 82+ 113+ 113+ 113+ 113+ 113+ 113+ 82+ 113+ 113+ 113+ 113+
## [31] 113+ 113+ 82+ 113+ 113+ 113+ 113+ 113+ 82+ 89 96 92 82+ 92 71
## [46] 96 82+ 96 92 92 82+ 99 82+ 103 99 82+ 103 110 82+ 110
## [61] 110 110 82+ 110 106 106 92 76 71 71 110 110 106 110 82+
## [76] 103 106 82+ 110 106 99 96 96 82+ 92 92 82+ 96 96 99
## [91] 92 96 82+ 106 113+ 103 110 110 106 82+ 106 82+ 113+ 110 113+
## [106] 96 96 65 96 82+ 106 110 82+ 110 110 82+ 113+ 82+ 113+ 110
# Only treatment model
# Kaplan-Meier estimator. The "log-log" confidence interval is preferred.
fit1 <- survfit(surv_object ~ Treatment, data = Survival.data)
summary(fit1)
## Call: survfit(formula = surv_object ~ Treatment, data = Survival.data)
##
## Treatment=Ambient
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
##
## Treatment=N
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 71 41 3 0.927 0.0407 0.850 1.000
## 76 38 1 0.902 0.0463 0.816 0.998
## 89 28 1 0.870 0.0548 0.769 0.984
## 92 27 5 0.709 0.0789 0.570 0.882
## 96 22 3 0.612 0.0856 0.466 0.805
## 99 19 2 0.548 0.0879 0.400 0.750
## 103 17 3 0.451 0.0884 0.307 0.662
## 106 14 5 0.290 0.0810 0.168 0.502
## 110 9 9 0.000 NaN NA NA
##
## Treatment=N_P
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 65 40 1 0.975 0.0247 0.9278 1.000
## 92 30 3 0.877 0.0578 0.7712 0.999
## 96 27 8 0.617 0.0872 0.4682 0.814
## 99 19 2 0.552 0.0893 0.4025 0.758
## 103 17 1 0.520 0.0898 0.3707 0.729
## 106 16 4 0.390 0.0878 0.2509 0.606
## 110 12 7 0.163 0.0665 0.0729 0.362
sd1<-survdiff(surv_object~Treatment, data = Survival.data)
1 - pchisq(sd1$chisq, length(sd1$n) - 1)# pvalue
## [1] 3.774758e-15
#coxfit <- coxph(surv_object ~ Treatment, data = Survival.data, ties = 'exact')
#coxfit <- coxph(surv_object ~ Treatment, data = Survival.data, singular.ok = TRUE)
#summary(coxfit)
# Plot the survival model
Ac_Treatment_Only<-ggsurvplot(fit1, data = Survival.data, pval = TRUE,
conf.int = T, risk.table=T, palette=Fill.colour,
break.time.by=15, xlim=c(0,115), risk.table.y.text = FALSE,
risk.table.title="Number of fragments at risk") + ggtitle("Treatment (all genotypes)")
Ac_Treatment_Only
# Other plots
ggsurvplot(fit1, data = Survival.data, fun = "event")
ggsurvplot(fit1, data = Survival.data, fun = "cumhaz")
ggsurvplot(fit1, data = Survival.data, fun = "pct")
Model
# Treatment and genotype model 1
# Kaplan-Meier estimator. The "log-log" confidence interval is preferred.
fit2 <- survfit(surv_object ~ Genotype + Treatment, data = Survival.data)
#fit2 <- survfit(surv_object ~ Treatment+ Genotype, data = Survival.data)
summary(fit2)
## Call: survfit(formula = surv_object ~ Genotype + Treatment, data = Survival.data)
##
## Genotype=G_48, Treatment=Ambient
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
##
## Genotype=G_48, Treatment=N
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 106 7 2 0.714 0.171 0.447 1
## 110 5 5 0.000 NaN NA NA
##
## Genotype=G_48, Treatment=N_P
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 106 7 2 0.714 0.171 0.4471 1.000
## 110 5 3 0.286 0.171 0.0886 0.922
##
## Genotype=G_62, Treatment=Ambient
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
##
## Genotype=G_62, Treatment=N
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 103 8 1 0.875 0.117 0.673 1
## 106 7 3 0.500 0.177 0.250 1
## 110 4 4 0.000 NaN NA NA
##
## Genotype=G_62, Treatment=N_P
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 106 7 1 0.857 0.132 0.6334 1.000
## 110 6 4 0.286 0.171 0.0886 0.922
##
## Genotype=G_31, Treatment=Ambient
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
##
## Genotype=G_31, Treatment=N
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 99 4 2 0.5 0.25 0.188 1
## 103 2 2 0.0 NaN NA NA
##
## Genotype=G_31, Treatment=N_P
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 96 4 1 0.75 0.217 0.4259 1
## 103 3 1 0.50 0.250 0.1877 1
## 106 2 1 0.25 0.217 0.0458 1
##
## Genotype=G_08, Treatment=Ambient
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
##
## Genotype=G_08, Treatment=N
## time n.risk n.event survival std.err lower 95% CI
## 92 2 2 0 NaN NA
## upper 95% CI
## NA
##
## Genotype=G_08, Treatment=N_P
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 92 3 1 0.667 0.272 0.2995 1
## 96 2 1 0.333 0.272 0.0673 1
## 99 1 1 0.000 NaN NA NA
##
## Genotype=G_07, Treatment=Ambient
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
##
## Genotype=G_07, Treatment=N
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 71 9 1 0.889 0.105 0.706 1
## 89 6 1 0.741 0.161 0.484 1
## 92 5 2 0.444 0.189 0.193 1
## 96 3 3 0.000 NaN NA NA
##
## Genotype=G_07, Treatment=N_P
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 92 6 2 0.667 0.192 0.3786 1.000
## 96 4 3 0.167 0.152 0.0278 0.997
## 99 1 1 0.000 NaN NA NA
##
## Genotype=G_50, Treatment=Ambient
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
##
## Genotype=G_50, Treatment=N
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 71 4 2 0.50 0.250 0.1877 1
## 76 2 1 0.25 0.217 0.0458 1
## 92 1 1 0.00 NaN NA NA
##
## Genotype=G_50, Treatment=N_P
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 65 5 1 0.8 0.179 0.516 1
## 96 3 3 0.0 NaN NA NA
results<-summary(fit2, times = c(82, 91, 106, 110))
save.df <- as.data.frame(results[c("strata", "time", "n.risk", "n.event", "surv", "std.err")])
write.csv(save.df, file = "survival.csv")
sd2<-survdiff(surv_object~ Genotype + Treatment, data = Survival.data)
sd2
## Call:
## survdiff(formula = surv_object ~ Genotype + Treatment, data = Survival.data)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## Genotype=G_48, Treatment=Ambient 10 0 6.834 6.8344 9.2424
## Genotype=G_48, Treatment=N 9 7 5.387 0.4830 0.6234
## Genotype=G_48, Treatment=N_P 9 5 5.387 0.0278 0.0359
## Genotype=G_62, Treatment=Ambient 9 0 5.991 5.9907 7.9633
## Genotype=G_62, Treatment=N 10 8 5.482 1.1569 1.4770
## Genotype=G_62, Treatment=N_P 10 5 5.731 0.0932 0.1220
## Genotype=G_31, Treatment=Ambient 5 0 3.417 3.4172 4.3250
## Genotype=G_31, Treatment=N 6 4 1.550 3.8727 4.3931
## Genotype=G_31, Treatment=N_P 5 3 2.103 0.3821 0.4496
## Genotype=G_08, Treatment=Ambient 2 0 1.688 1.6875 2.0739
## Genotype=G_08, Treatment=N 3 2 0.328 8.5054 9.1447
## Genotype=G_08, Treatment=N_P 3 3 0.758 6.6279 7.3929
## Genotype=G_07, Treatment=Ambient 9 0 5.991 5.9907 7.9633
## Genotype=G_07, Treatment=N 9 7 1.294 25.1507 28.6643
## Genotype=G_07, Treatment=N_P 8 6 1.544 12.8649 14.7952
## Genotype=G_50, Treatment=Ambient 4 0 3.375 3.3750 4.2801
## Genotype=G_50, Treatment=N 4 4 0.252 55.6404 59.0694
## Genotype=G_50, Treatment=N_P 5 4 0.887 10.9177 12.3517
##
## Chisq= 189 on 17 degrees of freedom, p= <2e-16
1 - pchisq(sd2$chisq, length(sd2$n) - 1)# pvalue
## [1] 0
Cox hazards
coxfit2 <- coxph(surv_object ~ Genotype + Treatment, data = Survival.data)
# coxfit2 <- coxph(surv_object ~ Genotype + Treatment +
# strata(Treatment), data = Survival.data)
summary(coxfit2)
## Call:
## coxph(formula = surv_object ~ Genotype + Treatment, data = Survival.data)
##
## n= 120, number of events= 58
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GenotypeG_62 1.327e-01 1.142e+00 4.008e-01 0.331 0.74057
## GenotypeG_31 1.589e+00 4.900e+00 5.205e-01 3.054 0.00226 **
## GenotypeG_08 5.123e+00 1.678e+02 8.900e-01 5.756 8.62e-09 ***
## GenotypeG_07 5.001e+00 1.485e+02 8.218e-01 6.085 1.16e-09 ***
## GenotypeG_50 5.990e+00 3.994e+02 9.137e-01 6.556 5.52e-11 ***
## TreatmentN 2.581e+01 1.623e+11 4.383e+03 0.006 0.99530
## TreatmentN_P 2.460e+01 4.845e+10 4.383e+03 0.006 0.99552
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## GenotypeG_62 1.142e+00 8.757e-01 0.5206 2.505
## GenotypeG_31 4.900e+00 2.041e-01 1.7670 13.591
## GenotypeG_08 1.678e+02 5.961e-03 29.3195 959.981
## GenotypeG_07 1.485e+02 6.732e-03 29.6717 743.560
## GenotypeG_50 3.994e+02 2.504e-03 66.6410 2394.207
## TreatmentN 1.623e+11 6.162e-12 0.0000 Inf
## TreatmentN_P 4.845e+10 2.064e-11 0.0000 Inf
##
## Concordance= 0.948 (se = 0.012 )
## Likelihood ratio test= 181.7 on 7 df, p=<2e-16
## Wald test = 49.33 on 7 df, p=2e-08
## Score (logrank) test = 108.9 on 7 df, p=<2e-16
coxfit2
## Call:
## coxph(formula = surv_object ~ Genotype + Treatment, data = Survival.data)
##
## coef exp(coef) se(coef) z p
## GenotypeG_62 1.327e-01 1.142e+00 4.008e-01 0.331 0.74057
## GenotypeG_31 1.589e+00 4.900e+00 5.205e-01 3.054 0.00226
## GenotypeG_08 5.123e+00 1.678e+02 8.900e-01 5.756 8.62e-09
## GenotypeG_07 5.001e+00 1.485e+02 8.218e-01 6.085 1.16e-09
## GenotypeG_50 5.990e+00 3.994e+02 9.137e-01 6.556 5.52e-11
## TreatmentN 2.581e+01 1.623e+11 4.383e+03 0.006 0.99530
## TreatmentN_P 2.460e+01 4.845e+10 4.383e+03 0.006 0.99552
##
## Likelihood ratio test=181.7 on 7 df, p=< 2.2e-16
## n= 120, number of events= 58
ggadjustedcurves(coxfit2, data=Survival.data, variable = "Treatment")
“z” gives the Wald statistic value. It corresponds to the ratio of each regression coefficient to its standard error (z = coef/se(coef)). The wald statistic evaluates, whether the beta (β) coefficient of a given variable is statistically significantly different from 0.
The regression coefficients is the the sign of the regression coefficients (coef). A positive sign means that the hazard (risk of death) is higher, and thus the prognosis worse, for subjects with higher values of that variable. The variable sex is encoded as a numeric vector. 1: male, 2: female. The R summary for the Cox model gives the hazard ratio (HR) for the second group relative to the first group.
Hazard ratios. The exponentiated coefficients (exp(coef)), also known as hazard ratios, give the effect size of covariates. Confidence intervals of the hazard ratios. The summary output also gives upper and lower 95% confidence intervals for the hazard ratio (exp(coef))
Global statistical significance of the model. p-values for three alternative tests for overall significance of the model: The likelihood-ratio test, Wald test, and score logrank statistics. These three methods are asymptotically equivalent. For large enough N, they will give similar results. For small N, they may differ somewhat. The Likelihood ratio test has better behavior for small sample sizes, so it is generally preferred.
# Test for the proportional-hazards (PH) assumption
test.ph <- cox.zph(coxfit2)
test.ph
## chisq df p
## Genotype 8.20821 5 0.15
## Treatment 0.00639 2 1.00
## GLOBAL 8.39189 7 0.30
# the test is not statistically significant for each of the covariates, or the global test.
# Therefore, we can assume the proportional hazards.
ggcoxzph(test.ph)
# Systematic departures from a horizontal line are indicative of non-proportional hazard
# Testing influential observations
ggcoxdiagnostics(coxfit2, type = "dfbeta",
linear.predictions = FALSE, ggtheme = theme_bw())
#Positive values correspond to individuals that “died too soon” compared to expected survival times.
#Negative values correspond to individual that “lived too long”.
#Very large or small values are outliers, which are poorly predicted by the model.
ggcoxdiagnostics(coxfit2, type = "deviance",
linear.predictions = FALSE, ggtheme = theme_bw())
# Testing non linearity (for numeric variales)
#$ggcoxfunctional(Surv(time = Survival.data$Fu.time_texp,
# event = Survival.data$Fu.stat_exp) ~ Day + log(Day) + sqrt(Day),
# data = Survival.data)
Plor survival models
# Plot the survival model
GenTre_1<-ggsurvplot(fit2, data = Survival.data, pval = TRUE,
risk.table=T, tables.height=0.5)
#GenTre_1
Ac_facet_T<-ggsurvplot_facet(fit2, data = Survival.data, facet.by="Genotype",
#risk.table=T, tables.height=0.5,
conf.int = T,
nrow = 6, alpha=1,
palette=Fill.colour, linetype=1)+
geom_segment(aes(x = 0, y = 0, xend = 91, yend = 0), linetype="dashed", colour = "gray35")+
geom_segment(aes(x = 79, xend = 91, y = 0, yend = 0.5), colour = "gray35", linetype="dotted")+
geom_segment(aes(x = 91, xend = 113, y = 0.5, yend = 0.5), colour = "gray35", linetype="dotted")
Ac_facet_T
#ggsave("Outputs/Ac_facet_T.svg", Ac_facet_T, width=4, height=7,dpi = 300)
Ac_facet_G<-ggsurvplot_facet(fit2, data = Survival.data,
facet.by="Treatment",
conf.int = T,
# risk.table=T, tables.height=0.5,
nrow = 3, alpha=0.5,
linetype=1)+
geom_segment(aes(x = 0, y = 0, xend = 91, yend = 0), linetype="dashed", colour = "gray35")+
geom_segment(aes(x = 79, xend = 91, y = 0, yend = 0.5), colour = "gray35", linetype="dotted")+
geom_segment(aes(x = 91, xend = 113, y = 0.5, yend = 0.5), colour = "gray35", linetype="dotted")
Ac_facet_G
#ggsave("Outputs/Ac_facet_G.svg", Ac_facet_G, width=4, height=6,dpi = 300)
Corals in A did not experience any mortality, remove this data to compare the different genotypes under elevated nutrients.
Filter data
# Data
Survival.data2<-Survival.data[(Survival.data$Treatment!="Ambient"),]
summary(Survival.data2)
## Fragment Treatment Replicate Date Time_Point
## Ac_101 : 1 Ambient: 0 R1:42 02/05/2018:18 T14 :18
## Ac_103 : 1 N :41 R2:39 03/05/2018:16 T22 :16
## Ac_104 : 1 N_P :40 02/19/2018:11 T18 :11
## Ac_106 : 1 03/01/2018: 9 T21 : 9
## Ac_107 : 1 02/15/2018: 8 T17 : 8
## Ac_109 : 1 03/08/2018: 5 T23 : 5
## (Other):75 (Other) :14 (Other):14
## Phase Genotype Gen_Treat Days_Experiment Days_Survivor
## Heat :57 G_48:18 Ac_62_N :10 Min. : 65.00 Min. :179.0
## Nutrients:23 G_62:20 Ac_62_N_P:10 1st Qu.: 82.00 1st Qu.:196.0
## Ramping : 1 G_31:11 Ac_07_N : 9 Median : 96.00 Median :210.0
## G_08: 6 Ac_48_N : 9 Mean : 96.27 Mean :210.3
## G_07:17 Ac_48_N_P: 9 3rd Qu.:110.00 3rd Qu.:224.0
## G_50: 9 Ac_07_N_P: 8 Max. :113.00 Max. :227.0
## (Other) :26
## Fu.time_tot Fu.stat_tot Fu.time_texp Fu.stat_exp
## Min. :179.0 Min. :0.000 Min. : 65.00 Min. :0.000
## 1st Qu.:196.0 1st Qu.:0.000 1st Qu.: 82.00 1st Qu.:0.000
## Median :210.0 Median :1.000 Median : 96.00 Median :1.000
## Mean :210.3 Mean :0.716 Mean : 96.27 Mean :0.716
## 3rd Qu.:224.0 3rd Qu.:1.000 3rd Qu.:110.00 3rd Qu.:1.000
## Max. :227.0 Max. :1.000 Max. :113.00 Max. :1.000
##
summary(Survival.data2$Genotype)
## G_48 G_62 G_31 G_08 G_07 G_50
## 18 20 11 6 17 9
Survival.data2$Genotype<-factor(Survival.data2$Genotype,
levels=c("G_48", "G_62","G_31", "G_08","G_07", "G_50"))
summary(Survival.data2$Genotype)
## G_48 G_62 G_31 G_08 G_07 G_50
## 18 20 11 6 17 9
summary(Survival.data2$Treatment)
## Ambient N N_P
## 0 41 40
Survival.data2$Treatment<-factor(Survival.data2$Treatment,
levels=c("N", "N_P"))
## Add survival object (Fit survival data using the Kaplan-Meier method)
surv_object2 <- Surv(time = Survival.data2$Fu.time_texp,
event = Survival.data2$Fu.stat_exp)
surv_object2
## [1] 89 96 92 82+ 92 71 96 82+ 96 92 92 82+ 99 82+ 103
## [16] 99 82+ 103 110 82+ 110 110 110 82+ 110 106 106 92 76 71
## [31] 71 110 110 106 110 82+ 103 106 82+ 110 106 99 96 96 82+
## [46] 92 92 82+ 96 96 99 92 96 82+ 106 113+ 103 110 110 106
## [61] 82+ 106 82+ 113+ 110 113+ 96 96 65 96 82+ 106 110 82+ 110
## [76] 110 82+ 113+ 82+ 113+ 110
# Only genotype model
# Kaplan-Meier estimator. The "log-log" confidence interval is preferred.
fit3 <- survfit(surv_object2 ~ Genotype, data = Survival.data2)
summary(fit3)
## Call: survfit(formula = surv_object2 ~ Genotype, data = Survival.data2)
##
## Genotype=G_48
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 106 14 4 0.714 0.1207 0.5129 0.995
## 110 10 8 0.143 0.0935 0.0396 0.515
##
## Genotype=G_62
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 103 15 1 0.933 0.0644 0.8153 1.000
## 106 14 4 0.667 0.1217 0.4661 0.953
## 110 10 8 0.133 0.0878 0.0367 0.484
##
## Genotype=G_31
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 96 8 1 0.875 0.117 0.6734 1.000
## 99 7 2 0.625 0.171 0.3654 1.000
## 103 5 3 0.250 0.153 0.0753 0.830
## 106 2 1 0.125 0.117 0.0200 0.782
##
## Genotype=G_08
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 92 5 3 0.4 0.219 0.1367 1
## 96 2 1 0.2 0.179 0.0346 1
## 99 1 1 0.0 NaN NA NA
##
## Genotype=G_07
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 71 17 1 0.9412 0.0571 0.836 1.000
## 89 12 1 0.8627 0.0915 0.701 1.000
## 92 11 4 0.5490 0.1380 0.335 0.899
## 96 7 6 0.0784 0.0752 0.012 0.514
## 99 1 1 0.0000 NaN NA NA
##
## Genotype=G_50
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 65 9 1 0.889 0.105 0.706 1.000
## 71 8 2 0.667 0.157 0.420 1.000
## 76 6 1 0.556 0.166 0.310 0.997
## 92 4 1 0.417 0.173 0.185 0.940
## 96 3 3 0.000 NaN NA NA
summary(fit3)$table
## records n.max n.start events *rmean *se(rmean) median 0.95LCL
## Genotype=G_48 18 18 18 12 109.28571 0.6180002 110 110
## Genotype=G_62 20 20 20 13 108.86667 0.7046933 110 106
## Genotype=G_31 11 11 11 7 102.75000 1.7207375 103 99
## Genotype=G_08 6 6 6 5 94.20000 1.2774976 92 92
## Genotype=G_07 17 17 17 13 92.96078 1.5226638 96 92
## Genotype=G_50 9 9 9 8 84.22222 4.1370138 92 71
## 0.95UCL
## Genotype=G_48 NA
## Genotype=G_62 110
## Genotype=G_31 NA
## Genotype=G_08 NA
## Genotype=G_07 NA
## Genotype=G_50 NA
surv_pvalue(fit3)
res.sum <- surv_summary(fit3)
attr(res.sum, "table")
sd3<-survdiff(surv_object2 ~ Genotype, data = Survival.data2)
sd3
## Call:
## survdiff(formula = surv_object2 ~ Genotype, data = Survival.data2)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## Genotype=G_48 18 12 21.42 4.1426 10.4702
## Genotype=G_62 20 13 22.15 3.7802 9.6026
## Genotype=G_31 11 7 6.43 0.0496 0.0737
## Genotype=G_08 6 5 1.72 6.2608 7.5942
## Genotype=G_07 17 13 4.48 16.1838 22.4284
## Genotype=G_50 9 8 1.79 21.4898 26.4752
##
## Chisq= 76.7 on 5 degrees of freedom, p= 4e-15
1 - pchisq(sd3$chisq, length(sd3$n) - 1)# pvalue
## [1] 4.107825e-15
results3<-summary(fit3, times = c(82, 91, 106, 110))
save.df <- as.data.frame(results3[c("strata", "time", "n.risk", "n.event", "surv", "std.err")])
write.csv(save.df, file = "survival3.csv")
Plot survival model
# Plot the survival model
Genotype_only2<-ggsurvplot(fit3, data = Survival.data2, pval = TRUE,
risk.table=T,
#risk.table = "abs_pct", # absolute number and percentage at risk.
tables.height=0.4, conf.int = T, n.risk = TRUE,
#risk.table.y.text = FALSE,
#surv.median.line = "hv", # Specify median survival
break.time.by=15, xlim=c(0,115),
xlab="Days in the experiment",
#ncensor.plot = TRUE
risk.table.title="Number of A. cervicornis at risk",
#risk.table.y.text = FALSE,
risk.table.y.text.col = TRUE)
Genotype_only2
#Genotype_only2$ncensor.plot
Acer.Nut.Probabilities<-Genotype_only2$data.survplot
Acer.Nut.Probabilities
#ggsave("Outputs/Fig_2_Surv_Genotype.svg",
# Genotype_only2$plot, width=4.5, height=3.5,dpi = 300)
#ggsave("Outputs/Fig_2_Surv_Genotype.pdf", print(Genotype_only2),
# width=4.5, height=4.5,dpi = 300)
# Other plots
ggsurvplot(fit3, data = Survival.data2, conf.int = TRUE, fun = "event")
ggsurvplot(fit3, data = Survival.data2, conf.int = TRUE, fun = "cumhaz")
ggsurvplot(fit3, data = Survival.data2, conf.int = TRUE, fun = "pct")
coxfit3 <- coxph(surv_object2 ~ Genotype, data = Survival.data2)
summary(coxfit3)
## Call:
## coxph(formula = surv_object2 ~ Genotype, data = Survival.data2)
##
## n= 81, number of events= 58
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GenotypeG_62 0.07009 1.07260 0.40034 0.175 0.8610
## GenotypeG_31 1.08998 2.97422 0.48474 2.249 0.0245 *
## GenotypeG_08 4.16708 64.52701 0.81062 5.141 2.74e-07 ***
## GenotypeG_07 4.33373 76.22844 0.75356 5.751 8.87e-09 ***
## GenotypeG_50 4.91484 136.29688 0.80987 6.069 1.29e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## GenotypeG_62 1.073 0.932310 0.4894 2.351
## GenotypeG_31 2.974 0.336222 1.1502 7.691
## GenotypeG_08 64.527 0.015497 13.1746 316.043
## GenotypeG_07 76.228 0.013118 17.4053 333.850
## GenotypeG_50 136.297 0.007337 27.8689 666.579
##
## Concordance= 0.854 (se = 0.022 )
## Likelihood ratio test= 75.2 on 5 df, p=8e-15
## Wald test = 42.11 on 5 df, p=6e-08
## Score (logrank) test = 87.02 on 5 df, p=<2e-16
coxfit3
## Call:
## coxph(formula = surv_object2 ~ Genotype, data = Survival.data2)
##
## coef exp(coef) se(coef) z p
## GenotypeG_62 0.07009 1.07260 0.40034 0.175 0.8610
## GenotypeG_31 1.08998 2.97422 0.48474 2.249 0.0245
## GenotypeG_08 4.16708 64.52701 0.81062 5.141 2.74e-07
## GenotypeG_07 4.33373 76.22844 0.75356 5.751 8.87e-09
## GenotypeG_50 4.91484 136.29688 0.80987 6.069 1.29e-09
##
## Likelihood ratio test=75.2 on 5 df, p=8.446e-15
## n= 81, number of events= 58
ggadjustedcurves(coxfit3, data=Survival.data2, variable = "Genotype")
coxfit3 %>%
gtsummary::tbl_regression(exp = TRUE)
| Characteristic | HR1 | 95% CI1 | p-value |
|---|---|---|---|
| Genotype | |||
| G_48 | — | — | |
| G_62 | 1.07 | 0.49, 2.35 | 0.9 |
| G_31 | 2.97 | 1.15, 7.69 | 0.025 |
| G_08 | 64.5 | 13.2, 316 | <0.001 |
| G_07 | 76.2 | 17.4, 334 | <0.001 |
| G_50 | 136 | 27.9, 667 | <0.001 |
|
1
HR = Hazard Ratio, CI = Confidence Interval
|
|||
# Test for the proportional-hazards (PH) assumption
test.ph <- cox.zph(coxfit3)
test.ph
## chisq df p
## Genotype 9.86 5 0.079
## GLOBAL 9.86 5 0.079
# the test is not statistically significant for each of the covariates, or the global test.
# Therefore, we can assume the proportional hazards.
ggcoxzph(test.ph)
# Systematic departures from a horizontal line are indicative of non-proportional hazard
# Testing influential observations
ggcoxdiagnostics(coxfit3, type = "dfbeta",
linear.predictions = FALSE, ggtheme = theme_bw())
#Positive values correspond to individuals that “died too soon” compared to expected survival times.
#Negative values correspond to individual that “lived too long”.
#Very large or small values are outliers, which are poorly predicted by the model.
ggcoxdiagnostics(coxfit3, type = "deviance",
linear.predictions = FALSE, ggtheme = theme_bw())
# Testing non linearity (for numeric variales)
#$ggcoxfunctional(Surv(time = Survival.data$Fu.time_texp,
# event = Survival.data$Fu.stat_exp) ~ Day + log(Day) + sqrt(Day),
# data = Survival.data)
Plot Hazard ratio
HazardRatio3<-ggforest(coxfit3, data = Survival.data2)
HazardRatio3
#ggsave("Outputs/Fig_2b_HazardRatio.svg", HazardRatio3, width=5, height=4,dpi = 300)
Non-significant
# Only treatment model
# Kaplan-Meier estimator. The "log-log" confidence interval is preferred.
fit4 <- survfit(surv_object2 ~ Treatment, data = Survival.data2)
summary(fit4)
## Call: survfit(formula = surv_object2 ~ Treatment, data = Survival.data2)
##
## Treatment=N
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 71 41 3 0.927 0.0407 0.850 1.000
## 76 38 1 0.902 0.0463 0.816 0.998
## 89 28 1 0.870 0.0548 0.769 0.984
## 92 27 5 0.709 0.0789 0.570 0.882
## 96 22 3 0.612 0.0856 0.466 0.805
## 99 19 2 0.548 0.0879 0.400 0.750
## 103 17 3 0.451 0.0884 0.307 0.662
## 106 14 5 0.290 0.0810 0.168 0.502
## 110 9 9 0.000 NaN NA NA
##
## Treatment=N_P
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 65 40 1 0.975 0.0247 0.9278 1.000
## 92 30 3 0.877 0.0578 0.7712 0.999
## 96 27 8 0.617 0.0872 0.4682 0.814
## 99 19 2 0.552 0.0893 0.4025 0.758
## 103 17 1 0.520 0.0898 0.3707 0.729
## 106 16 4 0.390 0.0878 0.2509 0.606
## 110 12 7 0.163 0.0665 0.0729 0.362
surv_pvalue(fit4)
survdiff(surv_object2~Treatment, data = Survival.data2)
## Call:
## survdiff(formula = surv_object2 ~ Treatment, data = Survival.data2)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## Treatment=N 41 32 26.8 1.007 2.75
## Treatment=N_P 40 26 31.2 0.865 2.75
##
## Chisq= 2.8 on 1 degrees of freedom, p= 0.1
coxfit4 <- coxph(surv_object2 ~ Treatment, data = Survival.data2)
summary(coxfit4)
## Call:
## coxph(formula = surv_object2 ~ Treatment, data = Survival.data2)
##
## n= 81, number of events= 58
##
## coef exp(coef) se(coef) z Pr(>|z|)
## TreatmentN_P -0.4973 0.6082 0.2691 -1.848 0.0646 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## TreatmentN_P 0.6082 1.644 0.3589 1.031
##
## Concordance= 0.562 (se = 0.041 )
## Likelihood ratio test= 3.45 on 1 df, p=0.06
## Wald test = 3.41 on 1 df, p=0.06
## Score (logrank) test = 3.48 on 1 df, p=0.06
# Plot the survival model
Treatment_Only<-ggsurvplot(fit4, data = Survival.data2, pval = TRUE,
conf.int = T, risk.table=T, palette=Fill.colour)
Treatment_Only
# Plot the Hazard ratio
fit.coxph4 <- coxph(surv_object2 ~ Treatment, data = Survival.data2)
ggforest(fit.coxph4, data = Survival.data2)
fit.coxph4
## Call:
## coxph(formula = surv_object2 ~ Treatment, data = Survival.data2)
##
## coef exp(coef) se(coef) z p
## TreatmentN_P -0.4973 0.6082 0.2691 -1.848 0.0646
##
## Likelihood ratio test=3.45 on 1 df, p=0.06338
## n= 81, number of events= 58
# Treatment and genotype model 1
# Kaplan-Meier estimator. The "log-log" confidence interval is preferred.
fit5 <- survfit(surv_object2 ~ Treatment + Genotype, data = Survival.data2)
summary(fit5)
## Call: survfit(formula = surv_object2 ~ Treatment + Genotype, data = Survival.data2)
##
## Treatment=N, Genotype=G_48
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 106 7 2 0.714 0.171 0.447 1
## 110 5 5 0.000 NaN NA NA
##
## Treatment=N, Genotype=G_62
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 103 8 1 0.875 0.117 0.673 1
## 106 7 3 0.500 0.177 0.250 1
## 110 4 4 0.000 NaN NA NA
##
## Treatment=N, Genotype=G_31
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 99 4 2 0.5 0.25 0.188 1
## 103 2 2 0.0 NaN NA NA
##
## Treatment=N, Genotype=G_08
## time n.risk n.event survival std.err lower 95% CI
## 92 2 2 0 NaN NA
## upper 95% CI
## NA
##
## Treatment=N, Genotype=G_07
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 71 9 1 0.889 0.105 0.706 1
## 89 6 1 0.741 0.161 0.484 1
## 92 5 2 0.444 0.189 0.193 1
## 96 3 3 0.000 NaN NA NA
##
## Treatment=N, Genotype=G_50
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 71 4 2 0.50 0.250 0.1877 1
## 76 2 1 0.25 0.217 0.0458 1
## 92 1 1 0.00 NaN NA NA
##
## Treatment=N_P, Genotype=G_48
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 106 7 2 0.714 0.171 0.4471 1.000
## 110 5 3 0.286 0.171 0.0886 0.922
##
## Treatment=N_P, Genotype=G_62
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 106 7 1 0.857 0.132 0.6334 1.000
## 110 6 4 0.286 0.171 0.0886 0.922
##
## Treatment=N_P, Genotype=G_31
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 96 4 1 0.75 0.217 0.4259 1
## 103 3 1 0.50 0.250 0.1877 1
## 106 2 1 0.25 0.217 0.0458 1
##
## Treatment=N_P, Genotype=G_08
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 92 3 1 0.667 0.272 0.2995 1
## 96 2 1 0.333 0.272 0.0673 1
## 99 1 1 0.000 NaN NA NA
##
## Treatment=N_P, Genotype=G_07
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 92 6 2 0.667 0.192 0.3786 1.000
## 96 4 3 0.167 0.152 0.0278 0.997
## 99 1 1 0.000 NaN NA NA
##
## Treatment=N_P, Genotype=G_50
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 65 5 1 0.8 0.179 0.516 1
## 96 3 3 0.0 NaN NA NA
surv_pvalue(fit5)
coxfit5 <- coxph(surv_object2 ~ Genotype + Treatment, data = Survival.data2)
summary(coxfit5)
## Call:
## coxph(formula = surv_object2 ~ Genotype + Treatment, data = Survival.data2)
##
## n= 81, number of events= 58
##
## coef exp(coef) se(coef) z Pr(>|z|)
## GenotypeG_62 0.1327 1.1419 0.4008 0.331 0.74057
## GenotypeG_31 1.5893 4.9004 0.5205 3.054 0.00226 **
## GenotypeG_08 5.1226 167.7681 0.8900 5.756 8.62e-09 ***
## GenotypeG_07 5.0008 148.5352 0.8218 6.085 1.16e-09 ***
## GenotypeG_50 5.9901 399.4400 0.9137 6.556 5.52e-11 ***
## TreatmentN_P -1.2087 0.2986 0.3086 -3.916 9.00e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## GenotypeG_62 1.1419 0.875728 0.5206 2.5048
## GenotypeG_31 4.9004 0.204064 1.7670 13.5907
## GenotypeG_08 167.7681 0.005961 29.3195 959.9812
## GenotypeG_07 148.5352 0.006732 29.6717 743.5602
## GenotypeG_50 399.4400 0.002504 66.6410 2394.2073
## TreatmentN_P 0.2986 3.349201 0.1631 0.5467
##
## Concordance= 0.893 (se = 0.02 )
## Likelihood ratio test= 91.35 on 6 df, p=<2e-16
## Wald test = 49.33 on 6 df, p=6e-09
## Score (logrank) test = 96.48 on 6 df, p=<2e-16
coxfit5
## Call:
## coxph(formula = surv_object2 ~ Genotype + Treatment, data = Survival.data2)
##
## coef exp(coef) se(coef) z p
## GenotypeG_62 0.1327 1.1419 0.4008 0.331 0.74057
## GenotypeG_31 1.5893 4.9004 0.5205 3.054 0.00226
## GenotypeG_08 5.1226 167.7681 0.8900 5.756 8.62e-09
## GenotypeG_07 5.0008 148.5352 0.8218 6.085 1.16e-09
## GenotypeG_50 5.9901 399.4400 0.9137 6.556 5.52e-11
## TreatmentN_P -1.2087 0.2986 0.3086 -3.916 9.00e-05
##
## Likelihood ratio test=91.35 on 6 df, p=< 2.2e-16
## n= 81, number of events= 58
# Test for the proportional-hazards (PH) assumption
test.ph <- cox.zph(coxfit5)
test.ph
## chisq df p
## Genotype 8.20112 5 0.15
## Treatment 0.00635 1 0.94
## GLOBAL 8.38567 6 0.21
# the test is not statistically significant for each of the covariates, or the global test.
# Therefore, we can assume the proportional hazards.
ggcoxzph(test.ph)
# Systematic departures from a horizontal line are indicative of non-proportional hazard
# Testing influential observations
ggcoxdiagnostics(coxfit5, type = "dfbeta",
linear.predictions = FALSE, ggtheme = theme_bw())
#Positive values correspond to individuals that “died too soon” compared to expected survival times.
#Negative values correspond to individual that “lived too long”.
#Very large or small values are outliers, which are poorly predicted by the model.
ggcoxdiagnostics(coxfit2, type = "deviance",
linear.predictions = FALSE, ggtheme = theme_bw())
# Testing non linearity (for numeric variales)
#$ggcoxfunctional(Surv(time = Survival.data$Fu.time_texp,
# event = Survival.data$Fu.stat_exp) ~ Day + log(Day) + sqrt(Day),
# data = Survival.data)
Plot the model
# Plot the survival model
GenTre_5<-ggsurvplot(fit5, data = Survival.data2, pval = TRUE, conf.int = T,
risk.table=T, tables.height=0.5)
#GenTre_5
ggsurvplot_facet(fit5, data = Survival.data2,
facet.by="Genotype", risk.table=F, conf.int = T,
tables.height=0.5, nrow = 6, alpha=1,
palette=Fill.colour, linetype=1)+
annotate("segment", x = 2, xend = 91, y = -1.5, yend = -1.5,
colour = "gray35", linetype="dashed")+
annotate("segment", x = 79, xend = 91, y = -1.4, yend = 4.5,
colour = "gray35", linetype="dotted")+
annotate("segment", x = 91, xend = 110, y = 4.5, yend = 4.5,
colour = "gray35", linetype="dotted")
ggsurvplot_facet(fit5, data = Survival.data2,
facet.by="Treatment", risk.table=T, conf.int = T,
tables.height=0.5, nrow = 3, alpha=0.5,
linetype=1)
Plot the hazard ratios
ggforest(coxfit5, data = Survival.data2)