General project set-up

# Load all libraries and sources required to run the script
    library(tidyverse)
    library(ggthemes)
    library(survival)
    library(survminer)
    library(gtsummary)
    #library(rms)
   
# Default ggplot settings
  # Nutrient treatment colors
  Fill.colour<-c ("#4A6CAA", "#469B53", "#AA4A74")
  
  ggthe_bw<-theme(plot.background=element_blank(),
          panel.grid.major.y = element_blank(),
          panel.grid.major.x = element_blank(),
          panel.grid.minor.x = element_blank(),
          panel.grid.minor.y = element_blank(),
          legend.box.background = element_rect(),
          panel.background =element_rect(fill = NA, color = "black")
          )+
    theme_bw()

Try survival package

# Data
    Survival.data<-read.csv("Data/Acer_Mortality.csv", header = TRUE)
    summary(Survival.data)
##     Fragment     Treatment  Replicate         Date      Time_Point
##  Ac_101 :  1   Ambient:39   R1:63     03/08/2018:37   T23    :37  
##  Ac_102 :  1   N      :41   R2:57     02/05/2018:25   T14    :25  
##  Ac_103 :  1   N_P    :40             03/05/2018:16   T22    :16  
##  Ac_104 :  1                          02/19/2018:11   T18    :11  
##  Ac_105 :  1                          03/01/2018: 9   T21    : 9  
##  Ac_106 :  1                          02/15/2018: 8   T17    : 8  
##  (Other):114                          (Other)   :14   (Other):14  
##        Phase    Genotype          Gen_Treat  Days_Experiment  Days_Survivor  
##  Heat     :89   G_07:26   Ac_48_Ambient:10   Min.   : 65.00   Min.   :179.0  
##  Nutrients:30   G_08: 8   Ac_62_N      :10   1st Qu.: 87.25   1st Qu.:201.2  
##  Ramping  : 1   G_31:16   Ac_62_N_P    :10   Median :106.00   Median :220.0  
##                 G_48:28   Ac_07_Ambient: 9   Mean   : 99.90   Mean   :213.9  
##                 G_50:13   Ac_07_N      : 9   3rd Qu.:113.00   3rd Qu.:227.0  
##                 G_62:29   Ac_48_N      : 9   Max.   :113.00   Max.   :227.0  
##                           (Other)      :63                                   
##   Fu.time_tot     Fu.stat_tot      Fu.time_texp     Fu.stat_exp    
##  Min.   :179.0   Min.   :0.0000   Min.   : 65.00   Min.   :0.0000  
##  1st Qu.:201.2   1st Qu.:0.0000   1st Qu.: 87.25   1st Qu.:0.0000  
##  Median :220.0   Median :0.0000   Median :106.00   Median :0.0000  
##  Mean   :213.9   Mean   :0.4833   Mean   : 99.90   Mean   :0.4833  
##  3rd Qu.:227.0   3rd Qu.:1.0000   3rd Qu.:113.00   3rd Qu.:1.0000  
##  Max.   :227.0   Max.   :1.0000   Max.   :113.00   Max.   :1.0000  
## 
    summary(Survival.data$Genotype)
## G_07 G_08 G_31 G_48 G_50 G_62 
##   26    8   16   28   13   29
    Survival.data$Genotype<-factor(Survival.data$Genotype, 
                                   levels=c("G_48", "G_62","G_31", "G_08","G_07", "G_50"))
    #Survival.data$Treatment<-factor(Survival.data$Treatment, 
    #                               levels=c("N", "N_P","Ambient"))
    summary(Survival.data$Treatment)
## Ambient       N     N_P 
##      39      41      40

1. All data models

## Add survival object (Fit survival data using the Kaplan-Meier method)
  surv_object <- Surv(time = Survival.data$Fu.time_texp, event = Survival.data$Fu.stat_exp)
  surv_object 
##   [1] 113+ 113+ 113+  82+ 113+ 113+ 113+ 113+  82+ 113+ 113+  82+ 113+ 113+ 113+
##  [16] 113+ 113+ 113+  82+ 113+ 113+ 113+ 113+ 113+ 113+  82+ 113+ 113+ 113+ 113+
##  [31] 113+ 113+  82+ 113+ 113+ 113+ 113+ 113+  82+  89   96   92   82+  92   71 
##  [46]  96   82+  96   92   92   82+  99   82+ 103   99   82+ 103  110   82+ 110 
##  [61] 110  110   82+ 110  106  106   92   76   71   71  110  110  106  110   82+
##  [76] 103  106   82+ 110  106   99   96   96   82+  92   92   82+  96   96   99 
##  [91]  92   96   82+ 106  113+ 103  110  110  106   82+ 106   82+ 113+ 110  113+
## [106]  96   96   65   96   82+ 106  110   82+ 110  110   82+ 113+  82+ 113+ 110

Treatment only: This model pools all the genotyes and replicates per treatment

# Only treatment model

    # Kaplan-Meier estimator. The "log-log" confidence interval is preferred.
    fit1 <- survfit(surv_object ~ Treatment, data = Survival.data)
    summary(fit1)
## Call: survfit(formula = surv_object ~ Treatment, data = Survival.data)
## 
##                 Treatment=Ambient 
##      time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 
##                 Treatment=N 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    71     41       3    0.927  0.0407        0.850        1.000
##    76     38       1    0.902  0.0463        0.816        0.998
##    89     28       1    0.870  0.0548        0.769        0.984
##    92     27       5    0.709  0.0789        0.570        0.882
##    96     22       3    0.612  0.0856        0.466        0.805
##    99     19       2    0.548  0.0879        0.400        0.750
##   103     17       3    0.451  0.0884        0.307        0.662
##   106     14       5    0.290  0.0810        0.168        0.502
##   110      9       9    0.000     NaN           NA           NA
## 
##                 Treatment=N_P 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    65     40       1    0.975  0.0247       0.9278        1.000
##    92     30       3    0.877  0.0578       0.7712        0.999
##    96     27       8    0.617  0.0872       0.4682        0.814
##    99     19       2    0.552  0.0893       0.4025        0.758
##   103     17       1    0.520  0.0898       0.3707        0.729
##   106     16       4    0.390  0.0878       0.2509        0.606
##   110     12       7    0.163  0.0665       0.0729        0.362
    sd1<-survdiff(surv_object~Treatment, data = Survival.data)
    1 - pchisq(sd1$chisq, length(sd1$n) - 1)# pvalue
## [1] 3.774758e-15
    #coxfit <- coxph(surv_object ~ Treatment, data = Survival.data, ties = 'exact')
    #coxfit <- coxph(surv_object ~ Treatment, data = Survival.data, singular.ok = TRUE)
    #summary(coxfit)

    # Plot the survival model
    Ac_Treatment_Only<-ggsurvplot(fit1, data = Survival.data, pval = TRUE, 
           conf.int = T, risk.table=T, palette=Fill.colour,
           break.time.by=15, xlim=c(0,115), risk.table.y.text = FALSE,
           risk.table.title="Number of fragments at risk") + ggtitle("Treatment (all genotypes)")
    Ac_Treatment_Only

# Other plots    
    ggsurvplot(fit1, data = Survival.data, fun = "event")

    ggsurvplot(fit1, data = Survival.data, fun = "cumhaz")

    ggsurvplot(fit1, data = Survival.data, fun = "pct")

Genotype and treatment

Model

# Treatment and genotype model 1 
    # Kaplan-Meier estimator. The "log-log" confidence interval is preferred.
    fit2 <- survfit(surv_object ~ Genotype + Treatment, data = Survival.data)
    #fit2 <- survfit(surv_object ~ Treatment+ Genotype, data = Survival.data)
    summary(fit2)
## Call: survfit(formula = surv_object ~ Genotype + Treatment, data = Survival.data)
## 
##                 Genotype=G_48, Treatment=Ambient 
##      time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 
##                 Genotype=G_48, Treatment=N       
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   106      7       2    0.714   0.171        0.447            1
##   110      5       5    0.000     NaN           NA           NA
## 
##                 Genotype=G_48, Treatment=N_P     
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   106      7       2    0.714   0.171       0.4471        1.000
##   110      5       3    0.286   0.171       0.0886        0.922
## 
##                 Genotype=G_62, Treatment=Ambient 
##      time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 
##                 Genotype=G_62, Treatment=N       
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   103      8       1    0.875   0.117        0.673            1
##   106      7       3    0.500   0.177        0.250            1
##   110      4       4    0.000     NaN           NA           NA
## 
##                 Genotype=G_62, Treatment=N_P     
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   106      7       1    0.857   0.132       0.6334        1.000
##   110      6       4    0.286   0.171       0.0886        0.922
## 
##                 Genotype=G_31, Treatment=Ambient 
##      time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 
##                 Genotype=G_31, Treatment=N       
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    99      4       2      0.5    0.25        0.188            1
##   103      2       2      0.0     NaN           NA           NA
## 
##                 Genotype=G_31, Treatment=N_P     
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    96      4       1     0.75   0.217       0.4259            1
##   103      3       1     0.50   0.250       0.1877            1
##   106      2       1     0.25   0.217       0.0458            1
## 
##                 Genotype=G_08, Treatment=Ambient 
##      time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 
##                 Genotype=G_08, Treatment=N       
##         time       n.risk      n.event     survival      std.err lower 95% CI 
##           92            2            2            0          NaN           NA 
## upper 95% CI 
##           NA 
## 
##                 Genotype=G_08, Treatment=N_P     
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    92      3       1    0.667   0.272       0.2995            1
##    96      2       1    0.333   0.272       0.0673            1
##    99      1       1    0.000     NaN           NA           NA
## 
##                 Genotype=G_07, Treatment=Ambient 
##      time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 
##                 Genotype=G_07, Treatment=N       
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    71      9       1    0.889   0.105        0.706            1
##    89      6       1    0.741   0.161        0.484            1
##    92      5       2    0.444   0.189        0.193            1
##    96      3       3    0.000     NaN           NA           NA
## 
##                 Genotype=G_07, Treatment=N_P     
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    92      6       2    0.667   0.192       0.3786        1.000
##    96      4       3    0.167   0.152       0.0278        0.997
##    99      1       1    0.000     NaN           NA           NA
## 
##                 Genotype=G_50, Treatment=Ambient 
##      time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 
##                 Genotype=G_50, Treatment=N       
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    71      4       2     0.50   0.250       0.1877            1
##    76      2       1     0.25   0.217       0.0458            1
##    92      1       1     0.00     NaN           NA           NA
## 
##                 Genotype=G_50, Treatment=N_P     
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    65      5       1      0.8   0.179        0.516            1
##    96      3       3      0.0     NaN           NA           NA
    results<-summary(fit2, times = c(82, 91, 106, 110))
    save.df <- as.data.frame(results[c("strata", "time", "n.risk", "n.event", "surv", "std.err")])
    write.csv(save.df, file = "survival.csv")
    
    sd2<-survdiff(surv_object~ Genotype + Treatment, data = Survival.data)
    sd2
## Call:
## survdiff(formula = surv_object ~ Genotype + Treatment, data = Survival.data)
## 
##                                   N Observed Expected (O-E)^2/E (O-E)^2/V
## Genotype=G_48, Treatment=Ambient 10        0    6.834    6.8344    9.2424
## Genotype=G_48, Treatment=N        9        7    5.387    0.4830    0.6234
## Genotype=G_48, Treatment=N_P      9        5    5.387    0.0278    0.0359
## Genotype=G_62, Treatment=Ambient  9        0    5.991    5.9907    7.9633
## Genotype=G_62, Treatment=N       10        8    5.482    1.1569    1.4770
## Genotype=G_62, Treatment=N_P     10        5    5.731    0.0932    0.1220
## Genotype=G_31, Treatment=Ambient  5        0    3.417    3.4172    4.3250
## Genotype=G_31, Treatment=N        6        4    1.550    3.8727    4.3931
## Genotype=G_31, Treatment=N_P      5        3    2.103    0.3821    0.4496
## Genotype=G_08, Treatment=Ambient  2        0    1.688    1.6875    2.0739
## Genotype=G_08, Treatment=N        3        2    0.328    8.5054    9.1447
## Genotype=G_08, Treatment=N_P      3        3    0.758    6.6279    7.3929
## Genotype=G_07, Treatment=Ambient  9        0    5.991    5.9907    7.9633
## Genotype=G_07, Treatment=N        9        7    1.294   25.1507   28.6643
## Genotype=G_07, Treatment=N_P      8        6    1.544   12.8649   14.7952
## Genotype=G_50, Treatment=Ambient  4        0    3.375    3.3750    4.2801
## Genotype=G_50, Treatment=N        4        4    0.252   55.6404   59.0694
## Genotype=G_50, Treatment=N_P      5        4    0.887   10.9177   12.3517
## 
##  Chisq= 189  on 17 degrees of freedom, p= <2e-16
    1 - pchisq(sd2$chisq, length(sd2$n) - 1)# pvalue
## [1] 0

Cox hazards

 coxfit2 <- coxph(surv_object ~ Genotype + Treatment, data = Survival.data)
 
  # coxfit2 <- coxph(surv_object ~ Genotype + Treatment +
  #                  strata(Treatment), data = Survival.data)
    summary(coxfit2)
## Call:
## coxph(formula = surv_object ~ Genotype + Treatment, data = Survival.data)
## 
##   n= 120, number of events= 58 
## 
##                   coef exp(coef)  se(coef)     z Pr(>|z|)    
## GenotypeG_62 1.327e-01 1.142e+00 4.008e-01 0.331  0.74057    
## GenotypeG_31 1.589e+00 4.900e+00 5.205e-01 3.054  0.00226 ** 
## GenotypeG_08 5.123e+00 1.678e+02 8.900e-01 5.756 8.62e-09 ***
## GenotypeG_07 5.001e+00 1.485e+02 8.218e-01 6.085 1.16e-09 ***
## GenotypeG_50 5.990e+00 3.994e+02 9.137e-01 6.556 5.52e-11 ***
## TreatmentN   2.581e+01 1.623e+11 4.383e+03 0.006  0.99530    
## TreatmentN_P 2.460e+01 4.845e+10 4.383e+03 0.006  0.99552    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##              exp(coef) exp(-coef) lower .95 upper .95
## GenotypeG_62 1.142e+00  8.757e-01    0.5206     2.505
## GenotypeG_31 4.900e+00  2.041e-01    1.7670    13.591
## GenotypeG_08 1.678e+02  5.961e-03   29.3195   959.981
## GenotypeG_07 1.485e+02  6.732e-03   29.6717   743.560
## GenotypeG_50 3.994e+02  2.504e-03   66.6410  2394.207
## TreatmentN   1.623e+11  6.162e-12    0.0000       Inf
## TreatmentN_P 4.845e+10  2.064e-11    0.0000       Inf
## 
## Concordance= 0.948  (se = 0.012 )
## Likelihood ratio test= 181.7  on 7 df,   p=<2e-16
## Wald test            = 49.33  on 7 df,   p=2e-08
## Score (logrank) test = 108.9  on 7 df,   p=<2e-16
    coxfit2
## Call:
## coxph(formula = surv_object ~ Genotype + Treatment, data = Survival.data)
## 
##                   coef exp(coef)  se(coef)     z        p
## GenotypeG_62 1.327e-01 1.142e+00 4.008e-01 0.331  0.74057
## GenotypeG_31 1.589e+00 4.900e+00 5.205e-01 3.054  0.00226
## GenotypeG_08 5.123e+00 1.678e+02 8.900e-01 5.756 8.62e-09
## GenotypeG_07 5.001e+00 1.485e+02 8.218e-01 6.085 1.16e-09
## GenotypeG_50 5.990e+00 3.994e+02 9.137e-01 6.556 5.52e-11
## TreatmentN   2.581e+01 1.623e+11 4.383e+03 0.006  0.99530
## TreatmentN_P 2.460e+01 4.845e+10 4.383e+03 0.006  0.99552
## 
## Likelihood ratio test=181.7  on 7 df, p=< 2.2e-16
## n= 120, number of events= 58
   ggadjustedcurves(coxfit2, data=Survival.data, variable = "Treatment")

  • “z” gives the Wald statistic value. It corresponds to the ratio of each regression coefficient to its standard error (z = coef/se(coef)). The wald statistic evaluates, whether the beta (β) coefficient of a given variable is statistically significantly different from 0.

  • The regression coefficients is the the sign of the regression coefficients (coef). A positive sign means that the hazard (risk of death) is higher, and thus the prognosis worse, for subjects with higher values of that variable. The variable sex is encoded as a numeric vector. 1: male, 2: female. The R summary for the Cox model gives the hazard ratio (HR) for the second group relative to the first group.

  • Hazard ratios. The exponentiated coefficients (exp(coef)), also known as hazard ratios, give the effect size of covariates. Confidence intervals of the hazard ratios. The summary output also gives upper and lower 95% confidence intervals for the hazard ratio (exp(coef))

  • Global statistical significance of the model. p-values for three alternative tests for overall significance of the model: The likelihood-ratio test, Wald test, and score logrank statistics. These three methods are asymptotically equivalent. For large enough N, they will give similar results. For small N, they may differ somewhat. The Likelihood ratio test has better behavior for small sample sizes, so it is generally preferred.

# Test for the proportional-hazards (PH) assumption
    test.ph <- cox.zph(coxfit2)
    test.ph
##             chisq df    p
## Genotype  8.20821  5 0.15
## Treatment 0.00639  2 1.00
## GLOBAL    8.39189  7 0.30
    # the test is not statistically significant for each of the covariates, or the global test.
    # Therefore, we can assume the proportional hazards.
    
    ggcoxzph(test.ph)

    # Systematic departures from a horizontal line are indicative of non-proportional hazard
    
# Testing influential observations
    
    ggcoxdiagnostics(coxfit2, type = "dfbeta",
                 linear.predictions = FALSE, ggtheme = theme_bw())

      #Positive values correspond to individuals that “died too soon” compared to expected survival times.
      #Negative values correspond to individual that “lived too long”.
      #Very large or small values are outliers, which are poorly predicted by the model.
    
    ggcoxdiagnostics(coxfit2, type = "deviance",
                 linear.predictions = FALSE, ggtheme = theme_bw())

 # Testing non linearity (for numeric variales)
    #$ggcoxfunctional(Surv(time = Survival.data$Fu.time_texp,
    #                     event = Survival.data$Fu.stat_exp) ~ Day + log(Day) + sqrt(Day),
     #               data = Survival.data)

Plor survival models

    # Plot the survival model
    GenTre_1<-ggsurvplot(fit2, data = Survival.data, pval = TRUE,
           risk.table=T,  tables.height=0.5)
    #GenTre_1
    
    Ac_facet_T<-ggsurvplot_facet(fit2, data = Survival.data, facet.by="Genotype", 
                  #risk.table=T, tables.height=0.5, 
                  conf.int = T,
                  nrow = 6, alpha=1, 
                  palette=Fill.colour, linetype=1)+
      geom_segment(aes(x = 0, y = 0, xend = 91, yend = 0), linetype="dashed", colour = "gray35")+
      geom_segment(aes(x = 79, xend = 91, y = 0, yend = 0.5), colour = "gray35", linetype="dotted")+
      geom_segment(aes(x = 91, xend = 113, y = 0.5, yend = 0.5), colour = "gray35", linetype="dotted")
    Ac_facet_T

    #ggsave("Outputs/Ac_facet_T.svg", Ac_facet_T, width=4, height=7,dpi = 300)
      
    Ac_facet_G<-ggsurvplot_facet(fit2, data = Survival.data, 
                 facet.by="Treatment", 
                 conf.int = T,
                 # risk.table=T, tables.height=0.5, 
                 nrow = 3, alpha=0.5,
                 linetype=1)+
       geom_segment(aes(x = 0, y = 0, xend = 91, yend = 0), linetype="dashed", colour = "gray35")+
       geom_segment(aes(x = 79, xend = 91, y = 0, yend = 0.5), colour = "gray35", linetype="dotted")+
       geom_segment(aes(x = 91, xend = 113, y = 0.5, yend = 0.5), colour = "gray35", linetype="dotted")
    Ac_facet_G

    #ggsave("Outputs/Ac_facet_G.svg", Ac_facet_G, width=4, height=6,dpi = 300)

2. Only nutrients (N and N+P) data and models

# Data
    Survival.data2<-Survival.data[(Survival.data$Treatment!="Ambient"),]
    summary(Survival.data2)
##     Fragment    Treatment  Replicate         Date      Time_Point
##  Ac_101 : 1   Ambient: 0   R1:42     02/05/2018:18   T14    :18  
##  Ac_103 : 1   N      :41   R2:39     03/05/2018:16   T22    :16  
##  Ac_104 : 1   N_P    :40             02/19/2018:11   T18    :11  
##  Ac_106 : 1                          03/01/2018: 9   T21    : 9  
##  Ac_107 : 1                          02/15/2018: 8   T17    : 8  
##  Ac_109 : 1                          03/08/2018: 5   T23    : 5  
##  (Other):75                          (Other)   :14   (Other):14  
##        Phase    Genotype      Gen_Treat  Days_Experiment  Days_Survivor  
##  Heat     :57   G_48:18   Ac_62_N  :10   Min.   : 65.00   Min.   :179.0  
##  Nutrients:23   G_62:20   Ac_62_N_P:10   1st Qu.: 82.00   1st Qu.:196.0  
##  Ramping  : 1   G_31:11   Ac_07_N  : 9   Median : 96.00   Median :210.0  
##                 G_08: 6   Ac_48_N  : 9   Mean   : 96.27   Mean   :210.3  
##                 G_07:17   Ac_48_N_P: 9   3rd Qu.:110.00   3rd Qu.:224.0  
##                 G_50: 9   Ac_07_N_P: 8   Max.   :113.00   Max.   :227.0  
##                           (Other)  :26                                   
##   Fu.time_tot     Fu.stat_tot     Fu.time_texp     Fu.stat_exp   
##  Min.   :179.0   Min.   :0.000   Min.   : 65.00   Min.   :0.000  
##  1st Qu.:196.0   1st Qu.:0.000   1st Qu.: 82.00   1st Qu.:0.000  
##  Median :210.0   Median :1.000   Median : 96.00   Median :1.000  
##  Mean   :210.3   Mean   :0.716   Mean   : 96.27   Mean   :0.716  
##  3rd Qu.:224.0   3rd Qu.:1.000   3rd Qu.:110.00   3rd Qu.:1.000  
##  Max.   :227.0   Max.   :1.000   Max.   :113.00   Max.   :1.000  
## 
    summary(Survival.data2$Genotype)
## G_48 G_62 G_31 G_08 G_07 G_50 
##   18   20   11    6   17    9
    Survival.data2$Genotype<-factor(Survival.data2$Genotype, 
                                   levels=c("G_48", "G_62","G_31", "G_08","G_07", "G_50"))
    summary(Survival.data2$Genotype)
## G_48 G_62 G_31 G_08 G_07 G_50 
##   18   20   11    6   17    9
    summary(Survival.data2$Treatment)
## Ambient       N     N_P 
##       0      41      40
    Survival.data2$Treatment<-factor(Survival.data2$Treatment, 
                                   levels=c("N", "N_P"))
## Add survival object (Fit survival data using the Kaplan-Meier method)
  surv_object2 <- Surv(time = Survival.data2$Fu.time_texp, 
                       event = Survival.data2$Fu.stat_exp)
  surv_object2 
##  [1]  89   96   92   82+  92   71   96   82+  96   92   92   82+  99   82+ 103 
## [16]  99   82+ 103  110   82+ 110  110  110   82+ 110  106  106   92   76   71 
## [31]  71  110  110  106  110   82+ 103  106   82+ 110  106   99   96   96   82+
## [46]  92   92   82+  96   96   99   92   96   82+ 106  113+ 103  110  110  106 
## [61]  82+ 106   82+ 113+ 110  113+  96   96   65   96   82+ 106  110   82+ 110 
## [76] 110   82+ 113+  82+ 113+ 110

Genotype model

  • Model
# Only genotype model
  
  # Kaplan-Meier estimator. The "log-log" confidence interval is preferred.
    fit3 <- survfit(surv_object2 ~ Genotype, data = Survival.data2)
    summary(fit3)    
## Call: survfit(formula = surv_object2 ~ Genotype, data = Survival.data2)
## 
##                 Genotype=G_48 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   106     14       4    0.714  0.1207       0.5129        0.995
##   110     10       8    0.143  0.0935       0.0396        0.515
## 
##                 Genotype=G_62 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   103     15       1    0.933  0.0644       0.8153        1.000
##   106     14       4    0.667  0.1217       0.4661        0.953
##   110     10       8    0.133  0.0878       0.0367        0.484
## 
##                 Genotype=G_31 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    96      8       1    0.875   0.117       0.6734        1.000
##    99      7       2    0.625   0.171       0.3654        1.000
##   103      5       3    0.250   0.153       0.0753        0.830
##   106      2       1    0.125   0.117       0.0200        0.782
## 
##                 Genotype=G_08 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    92      5       3      0.4   0.219       0.1367            1
##    96      2       1      0.2   0.179       0.0346            1
##    99      1       1      0.0     NaN           NA           NA
## 
##                 Genotype=G_07 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    71     17       1   0.9412  0.0571        0.836        1.000
##    89     12       1   0.8627  0.0915        0.701        1.000
##    92     11       4   0.5490  0.1380        0.335        0.899
##    96      7       6   0.0784  0.0752        0.012        0.514
##    99      1       1   0.0000     NaN           NA           NA
## 
##                 Genotype=G_50 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    65      9       1    0.889   0.105        0.706        1.000
##    71      8       2    0.667   0.157        0.420        1.000
##    76      6       1    0.556   0.166        0.310        0.997
##    92      4       1    0.417   0.173        0.185        0.940
##    96      3       3    0.000     NaN           NA           NA
    summary(fit3)$table
##               records n.max n.start events    *rmean *se(rmean) median 0.95LCL
## Genotype=G_48      18    18      18     12 109.28571  0.6180002    110     110
## Genotype=G_62      20    20      20     13 108.86667  0.7046933    110     106
## Genotype=G_31      11    11      11      7 102.75000  1.7207375    103      99
## Genotype=G_08       6     6       6      5  94.20000  1.2774976     92      92
## Genotype=G_07      17    17      17     13  92.96078  1.5226638     96      92
## Genotype=G_50       9     9       9      8  84.22222  4.1370138     92      71
##               0.95UCL
## Genotype=G_48      NA
## Genotype=G_62     110
## Genotype=G_31      NA
## Genotype=G_08      NA
## Genotype=G_07      NA
## Genotype=G_50      NA
    surv_pvalue(fit3)
    res.sum <- surv_summary(fit3)
    attr(res.sum, "table")
    sd3<-survdiff(surv_object2 ~ Genotype, data = Survival.data2)
    sd3
## Call:
## survdiff(formula = surv_object2 ~ Genotype, data = Survival.data2)
## 
##                N Observed Expected (O-E)^2/E (O-E)^2/V
## Genotype=G_48 18       12    21.42    4.1426   10.4702
## Genotype=G_62 20       13    22.15    3.7802    9.6026
## Genotype=G_31 11        7     6.43    0.0496    0.0737
## Genotype=G_08  6        5     1.72    6.2608    7.5942
## Genotype=G_07 17       13     4.48   16.1838   22.4284
## Genotype=G_50  9        8     1.79   21.4898   26.4752
## 
##  Chisq= 76.7  on 5 degrees of freedom, p= 4e-15
    1 - pchisq(sd3$chisq, length(sd3$n) - 1)# pvalue
## [1] 4.107825e-15
    results3<-summary(fit3, times = c(82, 91, 106, 110))
    save.df <- as.data.frame(results3[c("strata", "time", "n.risk", "n.event", "surv", "std.err")])
    write.csv(save.df, file = "survival3.csv")

Figure 2a

Plot survival model

# Plot the survival model
   Genotype_only2<-ggsurvplot(fit3, data = Survival.data2, pval = TRUE,
               risk.table=T,
               #risk.table = "abs_pct",  # absolute number and percentage at risk.
               tables.height=0.4, conf.int = T, n.risk = TRUE,
               #risk.table.y.text = FALSE, 
               #surv.median.line = "hv", # Specify median survival
               break.time.by=15, xlim=c(0,115), 
               xlab="Days in the experiment",
           #ncensor.plot = TRUE
           risk.table.title="Number of A. cervicornis at risk",
           #risk.table.y.text = FALSE,
           risk.table.y.text.col = TRUE)
  Genotype_only2

  #Genotype_only2$ncensor.plot
     
  Acer.Nut.Probabilities<-Genotype_only2$data.survplot
  Acer.Nut.Probabilities
    #ggsave("Outputs/Fig_2_Surv_Genotype.svg", 
    #       Genotype_only2$plot, width=4.5, height=3.5,dpi = 300)
    #ggsave("Outputs/Fig_2_Surv_Genotype.pdf", print(Genotype_only2),
    #       width=4.5, height=4.5,dpi = 300)

  
  # Other plots    
    ggsurvplot(fit3, data = Survival.data2, conf.int = TRUE, fun = "event")

    ggsurvplot(fit3, data = Survival.data2, conf.int = TRUE, fun = "cumhaz")

    ggsurvplot(fit3, data = Survival.data2, conf.int = TRUE, fun = "pct")

  • Cox hazards
  coxfit3 <- coxph(surv_object2 ~ Genotype, data = Survival.data2)
    summary(coxfit3)
## Call:
## coxph(formula = surv_object2 ~ Genotype, data = Survival.data2)
## 
##   n= 81, number of events= 58 
## 
##                   coef exp(coef)  se(coef)     z Pr(>|z|)    
## GenotypeG_62   0.07009   1.07260   0.40034 0.175   0.8610    
## GenotypeG_31   1.08998   2.97422   0.48474 2.249   0.0245 *  
## GenotypeG_08   4.16708  64.52701   0.81062 5.141 2.74e-07 ***
## GenotypeG_07   4.33373  76.22844   0.75356 5.751 8.87e-09 ***
## GenotypeG_50   4.91484 136.29688   0.80987 6.069 1.29e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##              exp(coef) exp(-coef) lower .95 upper .95
## GenotypeG_62     1.073   0.932310    0.4894     2.351
## GenotypeG_31     2.974   0.336222    1.1502     7.691
## GenotypeG_08    64.527   0.015497   13.1746   316.043
## GenotypeG_07    76.228   0.013118   17.4053   333.850
## GenotypeG_50   136.297   0.007337   27.8689   666.579
## 
## Concordance= 0.854  (se = 0.022 )
## Likelihood ratio test= 75.2  on 5 df,   p=8e-15
## Wald test            = 42.11  on 5 df,   p=6e-08
## Score (logrank) test = 87.02  on 5 df,   p=<2e-16
    coxfit3
## Call:
## coxph(formula = surv_object2 ~ Genotype, data = Survival.data2)
## 
##                   coef exp(coef)  se(coef)     z        p
## GenotypeG_62   0.07009   1.07260   0.40034 0.175   0.8610
## GenotypeG_31   1.08998   2.97422   0.48474 2.249   0.0245
## GenotypeG_08   4.16708  64.52701   0.81062 5.141 2.74e-07
## GenotypeG_07   4.33373  76.22844   0.75356 5.751 8.87e-09
## GenotypeG_50   4.91484 136.29688   0.80987 6.069 1.29e-09
## 
## Likelihood ratio test=75.2  on 5 df, p=8.446e-15
## n= 81, number of events= 58
    ggadjustedcurves(coxfit3, data=Survival.data2, variable = "Genotype")

  coxfit3 %>% 
      gtsummary::tbl_regression(exp = TRUE) 
Characteristic HR1 95% CI1 p-value
Genotype
G_48
G_62 1.07 0.49, 2.35 0.9
G_31 2.97 1.15, 7.69 0.025
G_08 64.5 13.2, 316 <0.001
G_07 76.2 17.4, 334 <0.001
G_50 136 27.9, 667 <0.001

1 HR = Hazard Ratio, CI = Confidence Interval

# Test for the proportional-hazards (PH) assumption
    test.ph <- cox.zph(coxfit3)
    test.ph
##          chisq df     p
## Genotype  9.86  5 0.079
## GLOBAL    9.86  5 0.079
    # the test is not statistically significant for each of the covariates, or the global test.
    # Therefore, we can assume the proportional hazards.
    
    ggcoxzph(test.ph)

    # Systematic departures from a horizontal line are indicative of non-proportional hazard
    
# Testing influential observations
    
    ggcoxdiagnostics(coxfit3, type = "dfbeta",
                 linear.predictions = FALSE, ggtheme = theme_bw())

      #Positive values correspond to individuals that “died too soon” compared to expected survival times.
      #Negative values correspond to individual that “lived too long”.
      #Very large or small values are outliers, which are poorly predicted by the model.
    
    ggcoxdiagnostics(coxfit3, type = "deviance",
                 linear.predictions = FALSE, ggtheme = theme_bw())

 # Testing non linearity (for numeric variales)
    #$ggcoxfunctional(Surv(time = Survival.data$Fu.time_texp,
    #                     event = Survival.data$Fu.stat_exp) ~ Day + log(Day) + sqrt(Day),
     #               data = Survival.data)

Figure 2b

Plot Hazard ratio

HazardRatio3<-ggforest(coxfit3, data = Survival.data2)
HazardRatio3

#ggsave("Outputs/Fig_2b_HazardRatio.svg", HazardRatio3, width=5, height=4,dpi = 300)

Treatment model

Non-significant

# Only treatment model
    # Kaplan-Meier estimator. The "log-log" confidence interval is preferred.
    fit4 <- survfit(surv_object2 ~ Treatment, data = Survival.data2)
    summary(fit4)
## Call: survfit(formula = surv_object2 ~ Treatment, data = Survival.data2)
## 
##                 Treatment=N 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    71     41       3    0.927  0.0407        0.850        1.000
##    76     38       1    0.902  0.0463        0.816        0.998
##    89     28       1    0.870  0.0548        0.769        0.984
##    92     27       5    0.709  0.0789        0.570        0.882
##    96     22       3    0.612  0.0856        0.466        0.805
##    99     19       2    0.548  0.0879        0.400        0.750
##   103     17       3    0.451  0.0884        0.307        0.662
##   106     14       5    0.290  0.0810        0.168        0.502
##   110      9       9    0.000     NaN           NA           NA
## 
##                 Treatment=N_P 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    65     40       1    0.975  0.0247       0.9278        1.000
##    92     30       3    0.877  0.0578       0.7712        0.999
##    96     27       8    0.617  0.0872       0.4682        0.814
##    99     19       2    0.552  0.0893       0.4025        0.758
##   103     17       1    0.520  0.0898       0.3707        0.729
##   106     16       4    0.390  0.0878       0.2509        0.606
##   110     12       7    0.163  0.0665       0.0729        0.362
    surv_pvalue(fit4)
    survdiff(surv_object2~Treatment, data = Survival.data2)
## Call:
## survdiff(formula = surv_object2 ~ Treatment, data = Survival.data2)
## 
##                N Observed Expected (O-E)^2/E (O-E)^2/V
## Treatment=N   41       32     26.8     1.007      2.75
## Treatment=N_P 40       26     31.2     0.865      2.75
## 
##  Chisq= 2.8  on 1 degrees of freedom, p= 0.1
    coxfit4 <- coxph(surv_object2 ~ Treatment, data = Survival.data2)
    summary(coxfit4)
## Call:
## coxph(formula = surv_object2 ~ Treatment, data = Survival.data2)
## 
##   n= 81, number of events= 58 
## 
##                 coef exp(coef) se(coef)      z Pr(>|z|)  
## TreatmentN_P -0.4973    0.6082   0.2691 -1.848   0.0646 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##              exp(coef) exp(-coef) lower .95 upper .95
## TreatmentN_P    0.6082      1.644    0.3589     1.031
## 
## Concordance= 0.562  (se = 0.041 )
## Likelihood ratio test= 3.45  on 1 df,   p=0.06
## Wald test            = 3.41  on 1 df,   p=0.06
## Score (logrank) test = 3.48  on 1 df,   p=0.06
    # Plot the survival model
    Treatment_Only<-ggsurvplot(fit4, data = Survival.data2, pval = TRUE, 
           conf.int = T, risk.table=T, palette=Fill.colour)
    Treatment_Only

    # Plot the Hazard ratio
    fit.coxph4 <- coxph(surv_object2 ~ Treatment, data = Survival.data2)
    ggforest(fit.coxph4, data = Survival.data2)

    fit.coxph4
## Call:
## coxph(formula = surv_object2 ~ Treatment, data = Survival.data2)
## 
##                 coef exp(coef) se(coef)      z      p
## TreatmentN_P -0.4973    0.6082   0.2691 -1.848 0.0646
## 
## Likelihood ratio test=3.45  on 1 df, p=0.06338
## n= 81, number of events= 58

Treatment and genotype model

  • Model
# Treatment and genotype model 1 
    # Kaplan-Meier estimator. The "log-log" confidence interval is preferred.
    fit5 <- survfit(surv_object2 ~ Treatment + Genotype, data = Survival.data2)
    summary(fit5)
## Call: survfit(formula = surv_object2 ~ Treatment + Genotype, data = Survival.data2)
## 
##                 Treatment=N, Genotype=G_48 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   106      7       2    0.714   0.171        0.447            1
##   110      5       5    0.000     NaN           NA           NA
## 
##                 Treatment=N, Genotype=G_62 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   103      8       1    0.875   0.117        0.673            1
##   106      7       3    0.500   0.177        0.250            1
##   110      4       4    0.000     NaN           NA           NA
## 
##                 Treatment=N, Genotype=G_31 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    99      4       2      0.5    0.25        0.188            1
##   103      2       2      0.0     NaN           NA           NA
## 
##                 Treatment=N, Genotype=G_08 
##         time       n.risk      n.event     survival      std.err lower 95% CI 
##           92            2            2            0          NaN           NA 
## upper 95% CI 
##           NA 
## 
##                 Treatment=N, Genotype=G_07 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    71      9       1    0.889   0.105        0.706            1
##    89      6       1    0.741   0.161        0.484            1
##    92      5       2    0.444   0.189        0.193            1
##    96      3       3    0.000     NaN           NA           NA
## 
##                 Treatment=N, Genotype=G_50 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    71      4       2     0.50   0.250       0.1877            1
##    76      2       1     0.25   0.217       0.0458            1
##    92      1       1     0.00     NaN           NA           NA
## 
##                 Treatment=N_P, Genotype=G_48 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   106      7       2    0.714   0.171       0.4471        1.000
##   110      5       3    0.286   0.171       0.0886        0.922
## 
##                 Treatment=N_P, Genotype=G_62 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   106      7       1    0.857   0.132       0.6334        1.000
##   110      6       4    0.286   0.171       0.0886        0.922
## 
##                 Treatment=N_P, Genotype=G_31 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    96      4       1     0.75   0.217       0.4259            1
##   103      3       1     0.50   0.250       0.1877            1
##   106      2       1     0.25   0.217       0.0458            1
## 
##                 Treatment=N_P, Genotype=G_08 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    92      3       1    0.667   0.272       0.2995            1
##    96      2       1    0.333   0.272       0.0673            1
##    99      1       1    0.000     NaN           NA           NA
## 
##                 Treatment=N_P, Genotype=G_07 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    92      6       2    0.667   0.192       0.3786        1.000
##    96      4       3    0.167   0.152       0.0278        0.997
##    99      1       1    0.000     NaN           NA           NA
## 
##                 Treatment=N_P, Genotype=G_50 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    65      5       1      0.8   0.179        0.516            1
##    96      3       3      0.0     NaN           NA           NA
    surv_pvalue(fit5)
  • Cox hazards
coxfit5 <- coxph(surv_object2 ~ Genotype + Treatment, data = Survival.data2)
    summary(coxfit5)
## Call:
## coxph(formula = surv_object2 ~ Genotype + Treatment, data = Survival.data2)
## 
##   n= 81, number of events= 58 
## 
##                  coef exp(coef) se(coef)      z Pr(>|z|)    
## GenotypeG_62   0.1327    1.1419   0.4008  0.331  0.74057    
## GenotypeG_31   1.5893    4.9004   0.5205  3.054  0.00226 ** 
## GenotypeG_08   5.1226  167.7681   0.8900  5.756 8.62e-09 ***
## GenotypeG_07   5.0008  148.5352   0.8218  6.085 1.16e-09 ***
## GenotypeG_50   5.9901  399.4400   0.9137  6.556 5.52e-11 ***
## TreatmentN_P  -1.2087    0.2986   0.3086 -3.916 9.00e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##              exp(coef) exp(-coef) lower .95 upper .95
## GenotypeG_62    1.1419   0.875728    0.5206    2.5048
## GenotypeG_31    4.9004   0.204064    1.7670   13.5907
## GenotypeG_08  167.7681   0.005961   29.3195  959.9812
## GenotypeG_07  148.5352   0.006732   29.6717  743.5602
## GenotypeG_50  399.4400   0.002504   66.6410 2394.2073
## TreatmentN_P    0.2986   3.349201    0.1631    0.5467
## 
## Concordance= 0.893  (se = 0.02 )
## Likelihood ratio test= 91.35  on 6 df,   p=<2e-16
## Wald test            = 49.33  on 6 df,   p=6e-09
## Score (logrank) test = 96.48  on 6 df,   p=<2e-16
    coxfit5
## Call:
## coxph(formula = surv_object2 ~ Genotype + Treatment, data = Survival.data2)
## 
##                  coef exp(coef) se(coef)      z        p
## GenotypeG_62   0.1327    1.1419   0.4008  0.331  0.74057
## GenotypeG_31   1.5893    4.9004   0.5205  3.054  0.00226
## GenotypeG_08   5.1226  167.7681   0.8900  5.756 8.62e-09
## GenotypeG_07   5.0008  148.5352   0.8218  6.085 1.16e-09
## GenotypeG_50   5.9901  399.4400   0.9137  6.556 5.52e-11
## TreatmentN_P  -1.2087    0.2986   0.3086 -3.916 9.00e-05
## 
## Likelihood ratio test=91.35  on 6 df, p=< 2.2e-16
## n= 81, number of events= 58
# Test for the proportional-hazards (PH) assumption
    test.ph <- cox.zph(coxfit5)
    test.ph
##             chisq df    p
## Genotype  8.20112  5 0.15
## Treatment 0.00635  1 0.94
## GLOBAL    8.38567  6 0.21
    # the test is not statistically significant for each of the covariates, or the global test.
    # Therefore, we can assume the proportional hazards.
    
    ggcoxzph(test.ph)

    # Systematic departures from a horizontal line are indicative of non-proportional hazard
    
# Testing influential observations
    
    ggcoxdiagnostics(coxfit5, type = "dfbeta",
                 linear.predictions = FALSE, ggtheme = theme_bw())

      #Positive values correspond to individuals that “died too soon” compared to expected survival times.
      #Negative values correspond to individual that “lived too long”.
      #Very large or small values are outliers, which are poorly predicted by the model.
    
    ggcoxdiagnostics(coxfit2, type = "deviance",
                 linear.predictions = FALSE, ggtheme = theme_bw())

 # Testing non linearity (for numeric variales)
    #$ggcoxfunctional(Surv(time = Survival.data$Fu.time_texp,
    #                     event = Survival.data$Fu.stat_exp) ~ Day + log(Day) + sqrt(Day),
     #               data = Survival.data)

Plot the model

  # Plot the survival model
  GenTre_5<-ggsurvplot(fit5, data = Survival.data2, pval = TRUE, conf.int = T,
           risk.table=T,  tables.height=0.5)
  #GenTre_5
    
  ggsurvplot_facet(fit5, data = Survival.data2, 
                 facet.by="Genotype", risk.table=F, conf.int = T,
                 tables.height=0.5, nrow = 6, alpha=1,
                 palette=Fill.colour, linetype=1)+
      annotate("segment", x = 2, xend = 91, y = -1.5, yend = -1.5,
                colour = "gray35", linetype="dashed")+
      annotate("segment", x = 79, xend = 91, y = -1.4, yend = 4.5,
                colour = "gray35", linetype="dotted")+
      annotate("segment", x = 91, xend = 110, y = 4.5, yend = 4.5,
                colour = "gray35",  linetype="dotted")

  ggsurvplot_facet(fit5, data = Survival.data2, 
                 facet.by="Treatment", risk.table=T, conf.int = T,
                 tables.height=0.5, nrow = 3, alpha=0.5,
                 linetype=1)

Plot the hazard ratios

ggforest(coxfit5, data = Survival.data2)