library(survival)
library(ranger)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggfortify)
library(readr)
Tree_Data <- read_csv("C:/Users/USER/Desktop/Tree_Data.csv")
## Rows: 2783 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): Subplot, Species, Light_Cat, Soil, Adult, Sterile, Conspecific, My...
## dbl (12): No, Plot, Light_ISF, Core, AMF, EMF, Phenolics, Lignin, NSC, Censu...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
EXPLORE
str(Tree_Data)
## spc_tbl_ [2,783 × 24] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ No : num [1:2783] 126 11 12 2823 5679 ...
## $ Plot : num [1:2783] 1 1 1 7 14 1 1 1 1 1 ...
## $ Subplot : chr [1:2783] "C" "C" "C" "D" ...
## $ Species : chr [1:2783] "Acer saccharum" "Quercus alba" "Quercus rubra" "Acer saccharum" ...
## $ Light_ISF : num [1:2783] 0.106 0.106 0.106 0.08 0.06 0.106 0.108 0.108 0.108 0.108 ...
## $ Light_Cat : chr [1:2783] "Med" "Med" "Med" "Med" ...
## $ Core : num [1:2783] 2017 2017 2017 2016 2017 ...
## $ Soil : chr [1:2783] "Prunus serotina" "Quercus rubra" "Prunus serotina" "Prunus serotina" ...
## $ Adult : chr [1:2783] "I" "970" "J" "J" ...
## $ Sterile : chr [1:2783] "Non-Sterile" "Non-Sterile" "Non-Sterile" "Non-Sterile" ...
## $ Conspecific: chr [1:2783] "Heterospecific" "Heterospecific" "Heterospecific" "Heterospecific" ...
## $ Myco : chr [1:2783] "AMF" "EMF" "EMF" "AMF" ...
## $ SoilMyco : chr [1:2783] "AMF" "EMF" "AMF" "AMF" ...
## $ PlantDate : chr [1:2783] "06/11/2018" "5/25/18" "5/31/18" "06/11/2018" ...
## $ AMF : num [1:2783] 22 15.8 24.4 22.2 21.1 ...
## $ EMF : num [1:2783] NA 31.1 28.2 NA NA ...
## $ Phenolics : num [1:2783] -0.56 5.19 3.36 -0.71 -0.58 0.3 5.11 3.43 3.83 -0.05 ...
## $ Lignin : num [1:2783] 13.9 20.5 24.7 14.3 10.8 ...
## $ NSC : num [1:2783] 12.2 19.3 15 12.4 11.2 ...
## $ Census : num [1:2783] 4 33 18 4 4 7 7 7 33 7 ...
## $ Time : num [1:2783] 14 116 63 14 14 ...
## $ Event : num [1:2783] 1 0 1 1 1 1 0 0 0 1 ...
## $ Harvest : chr [1:2783] NA NA NA NA ...
## $ Alive : chr [1:2783] NA "X" NA NA ...
## - attr(*, "spec")=
## .. cols(
## .. No = col_double(),
## .. Plot = col_double(),
## .. Subplot = col_character(),
## .. Species = col_character(),
## .. Light_ISF = col_double(),
## .. Light_Cat = col_character(),
## .. Core = col_double(),
## .. Soil = col_character(),
## .. Adult = col_character(),
## .. Sterile = col_character(),
## .. Conspecific = col_character(),
## .. Myco = col_character(),
## .. SoilMyco = col_character(),
## .. PlantDate = col_character(),
## .. AMF = col_double(),
## .. EMF = col_double(),
## .. Phenolics = col_double(),
## .. Lignin = col_double(),
## .. NSC = col_double(),
## .. Census = col_double(),
## .. Time = col_double(),
## .. Event = col_double(),
## .. Harvest = col_character(),
## .. Alive = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
summary(Tree_Data)
## No Plot Subplot Species
## Min. : 3 Min. : 1.000 Length:2783 Length:2783
## 1st Qu.:1971 1st Qu.: 5.000 Class :character Class :character
## Median :3932 Median :10.000 Mode :character Mode :character
## Mean :3915 Mean : 9.562
## 3rd Qu.:5879 3rd Qu.:14.000
## Max. :7772 Max. :18.000
##
## Light_ISF Light_Cat Core Soil
## Min. :0.03200 Length:2783 Min. :2016 Length:2783
## 1st Qu.:0.06600 Class :character 1st Qu.:2016 Class :character
## Median :0.08200 Mode :character Median :2017 Mode :character
## Mean :0.08571 Mean :2017
## 3rd Qu.:0.10000 3rd Qu.:2017
## Max. :0.16100 Max. :2017
##
## Adult Sterile Conspecific Myco
## Length:2783 Length:2783 Length:2783 Length:2783
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## SoilMyco PlantDate AMF EMF
## Length:2783 Length:2783 Min. : 0.00 Min. : 0.00
## Class :character Class :character 1st Qu.: 13.40 1st Qu.:13.78
## Mode :character Mode :character Median : 18.00 Median :27.72
## Mean : 20.55 Mean :26.48
## 3rd Qu.: 24.45 3rd Qu.:35.71
## Max. :100.00 Max. :87.50
## NA's :1500
## Phenolics Lignin NSC Census
## Min. :-1.350 Min. : 2.23 Min. : 4.30 Min. : 4.00
## 1st Qu.: 0.170 1st Qu.:10.36 1st Qu.:11.61 1st Qu.: 7.00
## Median : 0.750 Median :14.04 Median :12.66 Median :13.00
## Mean : 1.933 Mean :15.76 Mean :14.22 Mean :15.28
## 3rd Qu.: 3.780 3rd Qu.:21.11 3rd Qu.:17.27 3rd Qu.:18.00
## Max. : 6.100 Max. :32.77 Max. :29.45 Max. :33.00
##
## Time Event Harvest Alive
## Min. : 14.00 Min. :0.0000 Length:2783 Length:2783
## 1st Qu.: 24.50 1st Qu.:0.0000 Class :character Class :character
## Median : 45.50 Median :1.0000 Mode :character Mode :character
## Mean : 53.49 Mean :0.5705
## 3rd Qu.: 63.00 3rd Qu.:1.0000
## Max. :115.50 Max. :1.0000
## NA's :1
STANDARDISE VARIABLE NAMES
names(Tree_Data)<-str_to_title(names(Tree_Data))
names(Tree_Data)
## [1] "No" "Plot" "Subplot" "Species" "Light_isf"
## [6] "Light_cat" "Core" "Soil" "Adult" "Sterile"
## [11] "Conspecific" "Myco" "Soilmyco" "Plantdate" "Amf"
## [16] "Emf" "Phenolics" "Lignin" "Nsc" "Census"
## [21] "Time" "Event" "Harvest" "Alive"
IMPUTATION
Tree_Data$Event[is.na(Tree_Data$Event)]<-0
Tree_Data$Emf<-ifelse(is.na(Tree_Data$Emf),mean(Tree_Data$Emf,na.rm=T),Tree_Data$Emf)
NUMERIC TO STRING AND VICE VERSE
Tree_Data$Sterile<-as.factor(Tree_Data$Sterile)
Tree_Data$Myco<-as.factor(Tree_Data$Myco)
Tree_Data$Species<-as.factor(Tree_Data$Species)
Tree_Data$Subplot<-as.factor(Tree_Data$Subplot)
Tree_Data$Plot<-as.factor(Tree_Data$Plot)
Tree_Data$Conspecific<-as.factor(Tree_Data$Conspecific)
Tree_Data$Soil<-as.factor(Tree_Data$Soil)
Tree_Data$Plantdate<-mdy(Tree_Data$Plantdate)
#Tree_Data$Event<-as.factor(Tree_Data$Event)
Tree_Data$Census<-as.factor(Tree_Data$Census)
Tree_Data$Core<-as.factor(Tree_Data$Core)
Tree_Data$Adult<-as.factor(Tree_Data$Adult)
str(Tree_Data)
## spc_tbl_ [2,783 × 24] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ No : num [1:2783] 126 11 12 2823 5679 ...
## $ Plot : Factor w/ 18 levels "1","2","3","4",..: 1 1 1 7 14 1 1 1 1 1 ...
## $ Subplot : Factor w/ 5 levels "A","B","C","D",..: 3 3 3 4 1 3 1 1 1 1 ...
## $ Species : Factor w/ 4 levels "Acer saccharum",..: 1 3 4 1 1 2 3 4 4 1 ...
## $ Light_isf : num [1:2783] 0.106 0.106 0.106 0.08 0.06 0.106 0.108 0.108 0.108 0.108 ...
## $ Light_cat : chr [1:2783] "Med" "Med" "Med" "Med" ...
## $ Core : Factor w/ 2 levels "2016","2017": 2 2 2 1 2 1 1 2 1 1 ...
## $ Soil : Factor w/ 7 levels "Acer rubrum",..: 4 6 4 4 4 1 3 7 2 3 ...
## $ Adult : Factor w/ 36 levels "1027","118","1201",..: 35 31 36 36 27 10 28 16 7 23 ...
## $ Sterile : Factor w/ 2 levels "Non-Sterile",..: 1 1 1 1 1 1 1 2 1 1 ...
## $ Conspecific: Factor w/ 3 levels "Conspecific",..: 2 2 2 2 2 2 2 3 2 2 ...
## $ Myco : Factor w/ 2 levels "AMF","EMF": 1 2 2 1 1 1 2 2 2 1 ...
## $ Soilmyco : chr [1:2783] "AMF" "EMF" "AMF" "AMF" ...
## $ Plantdate : Date[1:2783], format: "2018-06-11" "2018-05-25" ...
## $ Amf : num [1:2783] 22 15.8 24.4 22.2 21.1 ...
## $ Emf : num [1:2783] 26.5 31.1 28.2 26.5 26.5 ...
## $ Phenolics : num [1:2783] -0.56 5.19 3.36 -0.71 -0.58 0.3 5.11 3.43 3.83 -0.05 ...
## $ Lignin : num [1:2783] 13.9 20.5 24.7 14.3 10.8 ...
## $ Nsc : num [1:2783] 12.2 19.3 15 12.4 11.2 ...
## $ Census : Factor w/ 22 levels "4","7","8","9",..: 1 22 13 1 1 2 2 2 22 2 ...
## $ Time : num [1:2783] 14 116 63 14 14 ...
## $ Event : num [1:2783] 1 0 1 1 1 1 0 0 0 1 ...
## $ Harvest : chr [1:2783] NA NA NA NA ...
## $ Alive : chr [1:2783] NA "X" NA NA ...
## - attr(*, "spec")=
## .. cols(
## .. No = col_double(),
## .. Plot = col_double(),
## .. Subplot = col_character(),
## .. Species = col_character(),
## .. Light_ISF = col_double(),
## .. Light_Cat = col_character(),
## .. Core = col_double(),
## .. Soil = col_character(),
## .. Adult = col_character(),
## .. Sterile = col_character(),
## .. Conspecific = col_character(),
## .. Myco = col_character(),
## .. SoilMyco = col_character(),
## .. PlantDate = col_character(),
## .. AMF = col_double(),
## .. EMF = col_double(),
## .. Phenolics = col_double(),
## .. Lignin = col_double(),
## .. NSC = col_double(),
## .. Census = col_double(),
## .. Time = col_double(),
## .. Event = col_double(),
## .. Harvest = col_character(),
## .. Alive = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
summary(Tree_Data)
## No Plot Subplot Species Light_isf
## Min. : 3 11 : 167 A:701 Acer saccharum :751 Min. :0.03200
## 1st Qu.:1971 18 : 166 B:663 Prunus serotina:749 1st Qu.:0.06600
## Median :3932 10 : 160 C:646 Quercus alba :673 Median :0.08200
## Mean :3915 7 : 159 D:666 Quercus rubra :610 Mean :0.08571
## 3rd Qu.:5879 12 : 159 E:107 3rd Qu.:0.10000
## Max. :7772 15 : 159 Max. :0.16100
## (Other):1813
## Light_cat Core Soil Adult
## Length:2783 2016: 977 Acer rubrum :376 984 : 90
## Class :character 2017:1806 Acer saccharum :397 I : 90
## Mode :character Populus grandidentata:391 689 : 89
## Prunus serotina :413 J : 89
## Quercus alba :381 275 : 88
## Quercus rubra :402 921 : 88
## Sterile :423 (Other):2249
## Sterile Conspecific Myco Soilmyco
## Non-Sterile:2360 Conspecific : 386 AMF:1500 Length:2783
## Sterile : 423 Heterospecific:1974 EMF:1283 Class :character
## Sterilized : 423 Mode :character
##
##
##
##
## Plantdate Amf Emf Phenolics
## Min. :2018-05-10 Min. : 0.00 Min. : 0.00 Min. :-1.350
## 1st Qu.:2018-05-25 1st Qu.: 13.40 1st Qu.:26.48 1st Qu.: 0.170
## Median :2018-06-01 Median : 18.00 Median :26.48 Median : 0.750
## Mean :2018-05-31 Mean : 20.55 Mean :26.48 Mean : 1.933
## 3rd Qu.:2018-06-06 3rd Qu.: 24.45 3rd Qu.:26.48 3rd Qu.: 3.780
## Max. :2018-06-15 Max. :100.00 Max. :87.50 Max. : 6.100
##
## Lignin Nsc Census Time
## Min. : 2.23 Min. : 4.30 7 :907 Min. : 14.00
## 1st Qu.:10.36 1st Qu.:11.61 33 :493 1st Qu.: 24.50
## Median :14.04 Median :12.66 12 :267 Median : 45.50
## Mean :15.76 Mean :14.22 13 :257 Mean : 53.49
## 3rd Qu.:21.11 3rd Qu.:17.27 14 :132 3rd Qu.: 63.00
## Max. :32.77 Max. :29.45 16 :126 Max. :115.50
## (Other):601
## Event Harvest Alive
## Min. :0.0000 Length:2783 Length:2783
## 1st Qu.:0.0000 Class :character Class :character
## Median :1.0000 Mode :character Mode :character
## Mean :0.5702
## 3rd Qu.:1.0000
## Max. :1.0000
##
head(Tree_Data$Plantdate)
## [1] "2018-06-11" "2018-05-25" "2018-05-31" "2018-06-11" "2018-06-11"
## [6] "2018-06-05"
MUTATE
Add Month and Day column
Tree_Data<-Tree_Data%>%mutate(Month=month(Plantdate,label=T),Day=wday(Plantdate,label=T))
names(Tree_Data)
## [1] "No" "Plot" "Subplot" "Species" "Light_isf"
## [6] "Light_cat" "Core" "Soil" "Adult" "Sterile"
## [11] "Conspecific" "Myco" "Soilmyco" "Plantdate" "Amf"
## [16] "Emf" "Phenolics" "Lignin" "Nsc" "Census"
## [21] "Time" "Event" "Harvest" "Alive" "Month"
## [26] "Day"
survival analysis
km <- with(Tree_Data, Surv(Time,Event))
head(km,80)
## [1] 14.0 115.5+ 63.0 14.0 14.0 24.5 24.5+ 24.5+ 115.5+ 24.5
## [11] 115.5+ 73.5 73.5 59.5 24.5+ 115.5+ 24.5+ 24.5+ 24.5+ 24.5
## [21] 24.5 24.5 115.5+ 24.5+ 73.5 73.5 24.5+ 59.5 24.5+ 24.5+
## [31] 115.5+ 115.5+ 115.5+ 115.5+ 24.5+ 24.5 66.5 24.5+ 24.5+ 24.5
## [41] 24.5 24.5+ 115.5+ 24.5+ 24.5+ 24.5+ 24.5 24.5 24.5+ 70.0
## [51] 24.5+ 63.0 24.5+ 24.5+ 115.5+ 24.5+ 24.5+ 24.5+ 24.5 24.5
## [61] 24.5+ 24.5+ 24.5+ 24.5+ 24.5+ 24.5+ 24.5+ 24.5+ 24.5 24.5+
## [71] 115.5+ 24.5 24.5+ 115.5+ 24.5+ 24.5 24.5+ 24.5 59.5 24.5+
km_fit <- survfit(Surv(Time, Event) ~ 1, data=Tree_Data)
summary(km_fit, times = c(1,30,60,90*(1:10)))
## Call: survfit(formula = Surv(Time, Event) ~ 1, data = Tree_Data)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 2783 0 1.000 0.00000 1.000 1.000
## 30 1815 264 0.905 0.00560 0.894 0.916
## 60 746 1069 0.372 0.01070 0.351 0.393
## 90 494 252 0.246 0.00957 0.228 0.266
autoplot(km_fit)
km_trt_fit <- survfit(Surv(Time, Event) ~Sterile, data=Tree_Data)
autoplot(km_trt_fit)
km_AG_fit <- survfit(Surv(Time, Event) ~Myco, data=Tree_Data)
autoplot(km_AG_fit)
km_AG_fit <- survfit(Surv(Time, Event) ~Soil, data=Tree_Data)
autoplot(km_AG_fit)
cox proportional hazard model
cox <- coxph(Surv(Time, Event) ~ Phenolics+Soil+Myco,data = Tree_Data)
summary(cox)
## Call:
## coxph(formula = Surv(Time, Event) ~ Phenolics + Soil + Myco,
## data = Tree_Data)
##
## n= 2783, number of events= 1587
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Phenolics -0.13706 0.87192 0.05470 -2.506 0.012227 *
## SoilAcer saccharum 0.20774 1.23089 0.09667 2.149 0.031633 *
## SoilPopulus grandidentata 0.22829 1.25645 0.09504 2.402 0.016307 *
## SoilPrunus serotina 0.36600 1.44195 0.09527 3.842 0.000122 ***
## SoilQuercus alba 0.41148 1.50905 0.09516 4.324 1.53e-05 ***
## SoilQuercus rubra 0.30358 1.35469 0.09617 3.157 0.001596 **
## SoilSterile -0.36928 0.69123 0.09985 -3.698 0.000217 ***
## MycoEMF -1.89963 0.14962 0.21596 -8.796 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Phenolics 0.8719 1.1469 0.78327 0.9706
## SoilAcer saccharum 1.2309 0.8124 1.01844 1.4877
## SoilPopulus grandidentata 1.2565 0.7959 1.04290 1.5137
## SoilPrunus serotina 1.4420 0.6935 1.19635 1.7380
## SoilQuercus alba 1.5091 0.6627 1.25229 1.8185
## SoilQuercus rubra 1.3547 0.7382 1.12197 1.6357
## SoilSterile 0.6912 1.4467 0.56837 0.8407
## MycoEMF 0.1496 6.6834 0.09799 0.2285
##
## Concordance= 0.764 (se = 0.006 )
## Likelihood ratio test= 1567 on 8 df, p=<2e-16
## Wald test = 1172 on 8 df, p=<2e-16
## Score (logrank) test = 1508 on 8 df, p=<2e-16
#cox <- coxph(Surv(time, status) ~ trt + celltype + karno + diagtime + age + prior , data = vet)
#summary(cox)