library(survival)
library(ranger)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggfortify)
library(readr)
Tree_Data <- read_csv("C:/Users/USER/Desktop/Tree_Data.csv")
## Rows: 2783 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): Subplot, Species, Light_Cat, Soil, Adult, Sterile, Conspecific, My...
## dbl (12): No, Plot, Light_ISF, Core, AMF, EMF, Phenolics, Lignin, NSC, Censu...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

EXPLORE

str(Tree_Data)
## spc_tbl_ [2,783 × 24] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ No         : num [1:2783] 126 11 12 2823 5679 ...
##  $ Plot       : num [1:2783] 1 1 1 7 14 1 1 1 1 1 ...
##  $ Subplot    : chr [1:2783] "C" "C" "C" "D" ...
##  $ Species    : chr [1:2783] "Acer saccharum" "Quercus alba" "Quercus rubra" "Acer saccharum" ...
##  $ Light_ISF  : num [1:2783] 0.106 0.106 0.106 0.08 0.06 0.106 0.108 0.108 0.108 0.108 ...
##  $ Light_Cat  : chr [1:2783] "Med" "Med" "Med" "Med" ...
##  $ Core       : num [1:2783] 2017 2017 2017 2016 2017 ...
##  $ Soil       : chr [1:2783] "Prunus serotina" "Quercus rubra" "Prunus serotina" "Prunus serotina" ...
##  $ Adult      : chr [1:2783] "I" "970" "J" "J" ...
##  $ Sterile    : chr [1:2783] "Non-Sterile" "Non-Sterile" "Non-Sterile" "Non-Sterile" ...
##  $ Conspecific: chr [1:2783] "Heterospecific" "Heterospecific" "Heterospecific" "Heterospecific" ...
##  $ Myco       : chr [1:2783] "AMF" "EMF" "EMF" "AMF" ...
##  $ SoilMyco   : chr [1:2783] "AMF" "EMF" "AMF" "AMF" ...
##  $ PlantDate  : chr [1:2783] "06/11/2018" "5/25/18" "5/31/18" "06/11/2018" ...
##  $ AMF        : num [1:2783] 22 15.8 24.4 22.2 21.1 ...
##  $ EMF        : num [1:2783] NA 31.1 28.2 NA NA ...
##  $ Phenolics  : num [1:2783] -0.56 5.19 3.36 -0.71 -0.58 0.3 5.11 3.43 3.83 -0.05 ...
##  $ Lignin     : num [1:2783] 13.9 20.5 24.7 14.3 10.8 ...
##  $ NSC        : num [1:2783] 12.2 19.3 15 12.4 11.2 ...
##  $ Census     : num [1:2783] 4 33 18 4 4 7 7 7 33 7 ...
##  $ Time       : num [1:2783] 14 116 63 14 14 ...
##  $ Event      : num [1:2783] 1 0 1 1 1 1 0 0 0 1 ...
##  $ Harvest    : chr [1:2783] NA NA NA NA ...
##  $ Alive      : chr [1:2783] NA "X" NA NA ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   No = col_double(),
##   ..   Plot = col_double(),
##   ..   Subplot = col_character(),
##   ..   Species = col_character(),
##   ..   Light_ISF = col_double(),
##   ..   Light_Cat = col_character(),
##   ..   Core = col_double(),
##   ..   Soil = col_character(),
##   ..   Adult = col_character(),
##   ..   Sterile = col_character(),
##   ..   Conspecific = col_character(),
##   ..   Myco = col_character(),
##   ..   SoilMyco = col_character(),
##   ..   PlantDate = col_character(),
##   ..   AMF = col_double(),
##   ..   EMF = col_double(),
##   ..   Phenolics = col_double(),
##   ..   Lignin = col_double(),
##   ..   NSC = col_double(),
##   ..   Census = col_double(),
##   ..   Time = col_double(),
##   ..   Event = col_double(),
##   ..   Harvest = col_character(),
##   ..   Alive = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
summary(Tree_Data)
##        No            Plot          Subplot            Species         
##  Min.   :   3   Min.   : 1.000   Length:2783        Length:2783       
##  1st Qu.:1971   1st Qu.: 5.000   Class :character   Class :character  
##  Median :3932   Median :10.000   Mode  :character   Mode  :character  
##  Mean   :3915   Mean   : 9.562                                        
##  3rd Qu.:5879   3rd Qu.:14.000                                        
##  Max.   :7772   Max.   :18.000                                        
##                                                                       
##    Light_ISF        Light_Cat              Core          Soil          
##  Min.   :0.03200   Length:2783        Min.   :2016   Length:2783       
##  1st Qu.:0.06600   Class :character   1st Qu.:2016   Class :character  
##  Median :0.08200   Mode  :character   Median :2017   Mode  :character  
##  Mean   :0.08571                      Mean   :2017                     
##  3rd Qu.:0.10000                      3rd Qu.:2017                     
##  Max.   :0.16100                      Max.   :2017                     
##                                                                        
##     Adult             Sterile          Conspecific            Myco          
##  Length:2783        Length:2783        Length:2783        Length:2783       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    SoilMyco          PlantDate              AMF              EMF       
##  Length:2783        Length:2783        Min.   :  0.00   Min.   : 0.00  
##  Class :character   Class :character   1st Qu.: 13.40   1st Qu.:13.78  
##  Mode  :character   Mode  :character   Median : 18.00   Median :27.72  
##                                        Mean   : 20.55   Mean   :26.48  
##                                        3rd Qu.: 24.45   3rd Qu.:35.71  
##                                        Max.   :100.00   Max.   :87.50  
##                                                         NA's   :1500   
##    Phenolics          Lignin           NSC            Census     
##  Min.   :-1.350   Min.   : 2.23   Min.   : 4.30   Min.   : 4.00  
##  1st Qu.: 0.170   1st Qu.:10.36   1st Qu.:11.61   1st Qu.: 7.00  
##  Median : 0.750   Median :14.04   Median :12.66   Median :13.00  
##  Mean   : 1.933   Mean   :15.76   Mean   :14.22   Mean   :15.28  
##  3rd Qu.: 3.780   3rd Qu.:21.11   3rd Qu.:17.27   3rd Qu.:18.00  
##  Max.   : 6.100   Max.   :32.77   Max.   :29.45   Max.   :33.00  
##                                                                  
##       Time            Event          Harvest             Alive          
##  Min.   : 14.00   Min.   :0.0000   Length:2783        Length:2783       
##  1st Qu.: 24.50   1st Qu.:0.0000   Class :character   Class :character  
##  Median : 45.50   Median :1.0000   Mode  :character   Mode  :character  
##  Mean   : 53.49   Mean   :0.5705                                        
##  3rd Qu.: 63.00   3rd Qu.:1.0000                                        
##  Max.   :115.50   Max.   :1.0000                                        
##                   NA's   :1

STANDARDISE VARIABLE NAMES

names(Tree_Data)<-str_to_title(names(Tree_Data))
names(Tree_Data)
##  [1] "No"          "Plot"        "Subplot"     "Species"     "Light_isf"  
##  [6] "Light_cat"   "Core"        "Soil"        "Adult"       "Sterile"    
## [11] "Conspecific" "Myco"        "Soilmyco"    "Plantdate"   "Amf"        
## [16] "Emf"         "Phenolics"   "Lignin"      "Nsc"         "Census"     
## [21] "Time"        "Event"       "Harvest"     "Alive"

IMPUTATION

Tree_Data$Event[is.na(Tree_Data$Event)]<-0
Tree_Data$Emf<-ifelse(is.na(Tree_Data$Emf),mean(Tree_Data$Emf,na.rm=T),Tree_Data$Emf)

NUMERIC TO STRING AND VICE VERSE

Tree_Data$Sterile<-as.factor(Tree_Data$Sterile)
Tree_Data$Myco<-as.factor(Tree_Data$Myco)
Tree_Data$Species<-as.factor(Tree_Data$Species)
Tree_Data$Subplot<-as.factor(Tree_Data$Subplot)
Tree_Data$Plot<-as.factor(Tree_Data$Plot)
Tree_Data$Conspecific<-as.factor(Tree_Data$Conspecific)
Tree_Data$Soil<-as.factor(Tree_Data$Soil)
Tree_Data$Plantdate<-mdy(Tree_Data$Plantdate)
#Tree_Data$Event<-as.factor(Tree_Data$Event)
Tree_Data$Census<-as.factor(Tree_Data$Census)
Tree_Data$Core<-as.factor(Tree_Data$Core)
Tree_Data$Adult<-as.factor(Tree_Data$Adult)
str(Tree_Data)
## spc_tbl_ [2,783 × 24] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ No         : num [1:2783] 126 11 12 2823 5679 ...
##  $ Plot       : Factor w/ 18 levels "1","2","3","4",..: 1 1 1 7 14 1 1 1 1 1 ...
##  $ Subplot    : Factor w/ 5 levels "A","B","C","D",..: 3 3 3 4 1 3 1 1 1 1 ...
##  $ Species    : Factor w/ 4 levels "Acer saccharum",..: 1 3 4 1 1 2 3 4 4 1 ...
##  $ Light_isf  : num [1:2783] 0.106 0.106 0.106 0.08 0.06 0.106 0.108 0.108 0.108 0.108 ...
##  $ Light_cat  : chr [1:2783] "Med" "Med" "Med" "Med" ...
##  $ Core       : Factor w/ 2 levels "2016","2017": 2 2 2 1 2 1 1 2 1 1 ...
##  $ Soil       : Factor w/ 7 levels "Acer rubrum",..: 4 6 4 4 4 1 3 7 2 3 ...
##  $ Adult      : Factor w/ 36 levels "1027","118","1201",..: 35 31 36 36 27 10 28 16 7 23 ...
##  $ Sterile    : Factor w/ 2 levels "Non-Sterile",..: 1 1 1 1 1 1 1 2 1 1 ...
##  $ Conspecific: Factor w/ 3 levels "Conspecific",..: 2 2 2 2 2 2 2 3 2 2 ...
##  $ Myco       : Factor w/ 2 levels "AMF","EMF": 1 2 2 1 1 1 2 2 2 1 ...
##  $ Soilmyco   : chr [1:2783] "AMF" "EMF" "AMF" "AMF" ...
##  $ Plantdate  : Date[1:2783], format: "2018-06-11" "2018-05-25" ...
##  $ Amf        : num [1:2783] 22 15.8 24.4 22.2 21.1 ...
##  $ Emf        : num [1:2783] 26.5 31.1 28.2 26.5 26.5 ...
##  $ Phenolics  : num [1:2783] -0.56 5.19 3.36 -0.71 -0.58 0.3 5.11 3.43 3.83 -0.05 ...
##  $ Lignin     : num [1:2783] 13.9 20.5 24.7 14.3 10.8 ...
##  $ Nsc        : num [1:2783] 12.2 19.3 15 12.4 11.2 ...
##  $ Census     : Factor w/ 22 levels "4","7","8","9",..: 1 22 13 1 1 2 2 2 22 2 ...
##  $ Time       : num [1:2783] 14 116 63 14 14 ...
##  $ Event      : num [1:2783] 1 0 1 1 1 1 0 0 0 1 ...
##  $ Harvest    : chr [1:2783] NA NA NA NA ...
##  $ Alive      : chr [1:2783] NA "X" NA NA ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   No = col_double(),
##   ..   Plot = col_double(),
##   ..   Subplot = col_character(),
##   ..   Species = col_character(),
##   ..   Light_ISF = col_double(),
##   ..   Light_Cat = col_character(),
##   ..   Core = col_double(),
##   ..   Soil = col_character(),
##   ..   Adult = col_character(),
##   ..   Sterile = col_character(),
##   ..   Conspecific = col_character(),
##   ..   Myco = col_character(),
##   ..   SoilMyco = col_character(),
##   ..   PlantDate = col_character(),
##   ..   AMF = col_double(),
##   ..   EMF = col_double(),
##   ..   Phenolics = col_double(),
##   ..   Lignin = col_double(),
##   ..   NSC = col_double(),
##   ..   Census = col_double(),
##   ..   Time = col_double(),
##   ..   Event = col_double(),
##   ..   Harvest = col_character(),
##   ..   Alive = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
summary(Tree_Data)
##        No            Plot      Subplot            Species      Light_isf      
##  Min.   :   3   11     : 167   A:701   Acer saccharum :751   Min.   :0.03200  
##  1st Qu.:1971   18     : 166   B:663   Prunus serotina:749   1st Qu.:0.06600  
##  Median :3932   10     : 160   C:646   Quercus alba   :673   Median :0.08200  
##  Mean   :3915   7      : 159   D:666   Quercus rubra  :610   Mean   :0.08571  
##  3rd Qu.:5879   12     : 159   E:107                         3rd Qu.:0.10000  
##  Max.   :7772   15     : 159                                 Max.   :0.16100  
##                 (Other):1813                                                  
##   Light_cat           Core                         Soil         Adult     
##  Length:2783        2016: 977   Acer rubrum          :376   984    :  90  
##  Class :character   2017:1806   Acer saccharum       :397   I      :  90  
##  Mode  :character               Populus grandidentata:391   689    :  89  
##                                 Prunus serotina      :413   J      :  89  
##                                 Quercus alba         :381   275    :  88  
##                                 Quercus rubra        :402   921    :  88  
##                                 Sterile              :423   (Other):2249  
##         Sterile             Conspecific    Myco        Soilmyco        
##  Non-Sterile:2360   Conspecific   : 386   AMF:1500   Length:2783       
##  Sterile    : 423   Heterospecific:1974   EMF:1283   Class :character  
##                     Sterilized    : 423              Mode  :character  
##                                                                        
##                                                                        
##                                                                        
##                                                                        
##    Plantdate               Amf              Emf          Phenolics     
##  Min.   :2018-05-10   Min.   :  0.00   Min.   : 0.00   Min.   :-1.350  
##  1st Qu.:2018-05-25   1st Qu.: 13.40   1st Qu.:26.48   1st Qu.: 0.170  
##  Median :2018-06-01   Median : 18.00   Median :26.48   Median : 0.750  
##  Mean   :2018-05-31   Mean   : 20.55   Mean   :26.48   Mean   : 1.933  
##  3rd Qu.:2018-06-06   3rd Qu.: 24.45   3rd Qu.:26.48   3rd Qu.: 3.780  
##  Max.   :2018-06-15   Max.   :100.00   Max.   :87.50   Max.   : 6.100  
##                                                                        
##      Lignin           Nsc            Census         Time       
##  Min.   : 2.23   Min.   : 4.30   7      :907   Min.   : 14.00  
##  1st Qu.:10.36   1st Qu.:11.61   33     :493   1st Qu.: 24.50  
##  Median :14.04   Median :12.66   12     :267   Median : 45.50  
##  Mean   :15.76   Mean   :14.22   13     :257   Mean   : 53.49  
##  3rd Qu.:21.11   3rd Qu.:17.27   14     :132   3rd Qu.: 63.00  
##  Max.   :32.77   Max.   :29.45   16     :126   Max.   :115.50  
##                                  (Other):601                   
##      Event          Harvest             Alive          
##  Min.   :0.0000   Length:2783        Length:2783       
##  1st Qu.:0.0000   Class :character   Class :character  
##  Median :1.0000   Mode  :character   Mode  :character  
##  Mean   :0.5702                                        
##  3rd Qu.:1.0000                                        
##  Max.   :1.0000                                        
## 
head(Tree_Data$Plantdate)
## [1] "2018-06-11" "2018-05-25" "2018-05-31" "2018-06-11" "2018-06-11"
## [6] "2018-06-05"

MUTATE

Add Month and Day column

Tree_Data<-Tree_Data%>%mutate(Month=month(Plantdate,label=T),Day=wday(Plantdate,label=T))
names(Tree_Data)
##  [1] "No"          "Plot"        "Subplot"     "Species"     "Light_isf"  
##  [6] "Light_cat"   "Core"        "Soil"        "Adult"       "Sterile"    
## [11] "Conspecific" "Myco"        "Soilmyco"    "Plantdate"   "Amf"        
## [16] "Emf"         "Phenolics"   "Lignin"      "Nsc"         "Census"     
## [21] "Time"        "Event"       "Harvest"     "Alive"       "Month"      
## [26] "Day"

survival analysis

km <- with(Tree_Data, Surv(Time,Event))
head(km,80)
##  [1]  14.0  115.5+  63.0   14.0   14.0   24.5   24.5+  24.5+ 115.5+  24.5 
## [11] 115.5+  73.5   73.5   59.5   24.5+ 115.5+  24.5+  24.5+  24.5+  24.5 
## [21]  24.5   24.5  115.5+  24.5+  73.5   73.5   24.5+  59.5   24.5+  24.5+
## [31] 115.5+ 115.5+ 115.5+ 115.5+  24.5+  24.5   66.5   24.5+  24.5+  24.5 
## [41]  24.5   24.5+ 115.5+  24.5+  24.5+  24.5+  24.5   24.5   24.5+  70.0 
## [51]  24.5+  63.0   24.5+  24.5+ 115.5+  24.5+  24.5+  24.5+  24.5   24.5 
## [61]  24.5+  24.5+  24.5+  24.5+  24.5+  24.5+  24.5+  24.5+  24.5   24.5+
## [71] 115.5+  24.5   24.5+ 115.5+  24.5+  24.5   24.5+  24.5   59.5   24.5+
km_fit <- survfit(Surv(Time, Event) ~ 1, data=Tree_Data)
summary(km_fit, times = c(1,30,60,90*(1:10)))
## Call: survfit(formula = Surv(Time, Event) ~ 1, data = Tree_Data)
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     1   2783       0    1.000 0.00000        1.000        1.000
##    30   1815     264    0.905 0.00560        0.894        0.916
##    60    746    1069    0.372 0.01070        0.351        0.393
##    90    494     252    0.246 0.00957        0.228        0.266
autoplot(km_fit)

km_trt_fit <- survfit(Surv(Time, Event) ~Sterile, data=Tree_Data)
autoplot(km_trt_fit)

km_AG_fit <- survfit(Surv(Time, Event) ~Myco, data=Tree_Data)
autoplot(km_AG_fit)

km_AG_fit <- survfit(Surv(Time, Event) ~Soil, data=Tree_Data)
autoplot(km_AG_fit)

cox proportional hazard model

cox <- coxph(Surv(Time, Event) ~ Phenolics+Soil+Myco,data = Tree_Data)
summary(cox)
## Call:
## coxph(formula = Surv(Time, Event) ~ Phenolics + Soil + Myco, 
##     data = Tree_Data)
## 
##   n= 2783, number of events= 1587 
## 
##                               coef exp(coef) se(coef)      z Pr(>|z|)    
## Phenolics                 -0.13706   0.87192  0.05470 -2.506 0.012227 *  
## SoilAcer saccharum         0.20774   1.23089  0.09667  2.149 0.031633 *  
## SoilPopulus grandidentata  0.22829   1.25645  0.09504  2.402 0.016307 *  
## SoilPrunus serotina        0.36600   1.44195  0.09527  3.842 0.000122 ***
## SoilQuercus alba           0.41148   1.50905  0.09516  4.324 1.53e-05 ***
## SoilQuercus rubra          0.30358   1.35469  0.09617  3.157 0.001596 ** 
## SoilSterile               -0.36928   0.69123  0.09985 -3.698 0.000217 ***
## MycoEMF                   -1.89963   0.14962  0.21596 -8.796  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                           exp(coef) exp(-coef) lower .95 upper .95
## Phenolics                    0.8719     1.1469   0.78327    0.9706
## SoilAcer saccharum           1.2309     0.8124   1.01844    1.4877
## SoilPopulus grandidentata    1.2565     0.7959   1.04290    1.5137
## SoilPrunus serotina          1.4420     0.6935   1.19635    1.7380
## SoilQuercus alba             1.5091     0.6627   1.25229    1.8185
## SoilQuercus rubra            1.3547     0.7382   1.12197    1.6357
## SoilSterile                  0.6912     1.4467   0.56837    0.8407
## MycoEMF                      0.1496     6.6834   0.09799    0.2285
## 
## Concordance= 0.764  (se = 0.006 )
## Likelihood ratio test= 1567  on 8 df,   p=<2e-16
## Wald test            = 1172  on 8 df,   p=<2e-16
## Score (logrank) test = 1508  on 8 df,   p=<2e-16

Fit Cox Model

#cox <- coxph(Surv(time, status) ~ trt + celltype + karno                   + diagtime + age + prior , data = vet)
#summary(cox)