library("TH.data")
library("tidyverse")
library("randomForest")
library("survival")
library("randomForestSRC")
data("GBSG2")
GBSG2%>%head(.,2)
## horTh age menostat tsize tgrade pnodes progrec estrec time cens
## 1 no 70 Post 21 II 3 48 66 1814 1
## 2 yes 56 Post 12 II 7 61 77 2018 1
Using 10 random variables.
set.seed(2000)
#breast10
n10<- as.data.frame(replicate(10,matrix(runif(10), nrow = 686)))
breast10 <- cbind(GBSG2,n10)
breast10%>%str(.)
## 'data.frame': 686 obs. of 20 variables:
## $ horTh : Factor w/ 2 levels "no","yes": 1 2 2 2 1 1 2 1 1 1 ...
## $ age : int 70 56 58 59 73 32 59 65 80 66 ...
## $ menostat: Factor w/ 2 levels "Pre","Post": 2 2 2 2 2 1 2 2 2 2 ...
## $ tsize : int 21 12 35 17 35 57 8 16 39 18 ...
## $ tgrade : Ord.factor w/ 3 levels "I"<"II"<"III": 2 2 2 2 2 3 2 2 2 2 ...
## $ pnodes : int 3 7 9 4 1 24 2 1 30 7 ...
## $ progrec : int 48 61 52 60 26 0 181 192 0 0 ...
## $ estrec : int 66 77 271 29 65 13 0 25 59 3 ...
## $ time : int 1814 2018 712 1807 772 448 2172 2161 471 2014 ...
## $ cens : int 1 1 1 1 1 1 0 0 1 0 ...
## $ V1 : num 0.197 0.716 0.362 0.391 0.813 ...
## $ V2 : num 0.1197 0.1424 0.0263 0.0731 0.9819 ...
## $ V3 : num 0.971 0.91 0.798 0.718 0.911 ...
## $ V4 : num 0.861 0.171 0.121 0.915 0.576 ...
## $ V5 : num 0.6755 0.0697 0.0388 0.2222 0.6726 ...
## $ V6 : num 0.476 0.6018 0.0711 0.5739 0.0445 ...
## $ V7 : num 0.147 0.698 0.281 0.141 0.208 ...
## $ V8 : num 0.346 0.344 0.756 0.726 0.705 ...
## $ V9 : num 0.498 0.776 0.112 0.997 0.671 ...
## $ V10 : num 0.806 0.692 0.83 0.664 0.299 ...
#train/ test
n = nrow(breast10)
split = sample(c(TRUE, FALSE), n, replace=TRUE, prob=c(0.75, 0.25))
training = breast10[split, ]
testing = breast10[!split, ]
#Fitting randomForest
breast10.rf <- randomForest(time ~ ., data=training,keep.forest=FALSE, importance=TRUE)
#fitting cox model
surv.object <- Surv(breast10$time, breast10$cens)
fit.coxph <- coxph(surv.object~ ., data = breast10)
#c-index
sum.surv <- fit.coxph %>% summary(.)
c_index <- sum.surv$concordance %>% print(.)
## C se(C)
## 1.000000e+00 3.783583e-18
set.seed(2000)
n50<- as.data.frame(replicate(50,matrix(runif(50), nrow = 686)))
breast50 <- cbind(GBSG2,n50)
#train/ test
n = nrow(breast50)
split = sample(c(TRUE, FALSE), n, replace=TRUE, prob=c(0.75, 0.25))
training = breast50[split, ]
testing = breast50[!split, ]
#Fitting randomForest
breast50.rf <- randomForest(time ~ ., data=training,keep.forest=FALSE, importance=TRUE)
#fitting cox model
surv.object <- Surv(breast50$time, breast50$cens)
fit.coxph <- coxph(surv.object~ ., data = breast50)
#c-index
sum.surv <- fit.coxph %>% summary(.)
c_index <- sum.surv$concordance %>% print(.)
## C se(C)
## 9.999850e-01 1.497032e-05
set.seed(2000)
n100<- as.data.frame(replicate(100,matrix(runif(100), nrow = 686)))
breast100 <- cbind(GBSG2,n100)
#train/ test
n = nrow(breast100)
split = sample(c(TRUE, FALSE), n, replace=TRUE, prob=c(0.75, 0.25))
training = breast100[split, ]
testing = breast100[!split, ]
#Fitting randomForest
breast100.rf <- randomForest(time ~ ., data=training,keep.forest=FALSE, importance=TRUE)
#fitting cox model
surv.object <- Surv(breast100$time, breast100$cens)
fit.coxph <- coxph(surv.object~ ., data = breast100)
#c-index
sum.surv <- fit.coxph %>% summary(.)
c_index <- sum.surv$concordance %>% print(.)
## C se(C)
## 9.999850e-01 1.493809e-05
for (x in 1:5) {
n10<- as.data.frame(replicate(10,matrix(runif(10), nrow = 686)))
breast10 <- cbind(GBSG2,n10)
n = nrow(breast10)
split = sample(c(TRUE, FALSE), n, replace=TRUE, prob=c(0.75, 0.25))
training = breast10[split, ]
testing = breast10[!split, ]
breast10.rf <- randomForest(time ~ ., data=training,keep.forest=FALSE, importance=TRUE)
surv.object <- Surv(breast10$time, breast10$cens)
fit.coxph <- coxph(surv.object~ ., data = breast10)
sum.surv <- summary(fit.coxph)
c_index <- sum.surv$concordance
c_index %>% print(.)
}
## C se(C)
## 1.000000e+00 3.783583e-18
## C se(C)
## 1.000000e+00 3.783583e-18
## C se(C)
## 1.000000e+00 3.783583e-18
## C se(C)
## 1.000000e+00 3.783583e-18
## C se(C)
## 1.000000e+00 3.783583e-18