Patients with colon and rectal cancer diagnosed in 1994-2000

library(relsurvForest)
data(colrec)
head(colrec)
##   sex      age  diag time stat stage   site agegr
## 1   1 62.98324 12656   16    0     1 rectum 61-68
## 2   2 33.07962 13388  504    0     3 rectum 12-60
## 3   1 66.46862 12711   22    0     3  colon 61-68
## 4   2 80.10076 13971 3998    0     1  colon 76-95
## 6   2 83.27675 12997    9    0    99  colon 76-95
## 7   1 41.49600 13814   88    0     2  colon 12-60
nessie.obj <-  nessie(Surv(time,stat)~agegr,
                      data = colrec,
                      ratetable = slopop,
                      times = seq(0,20,2),
                      rmap = list(age = age*365.24, sex = sex, year = diag))
## 
##               0      2      4      6      8     10     12     14     16     18
## agegr12-60 1473 1448.8 1421.8 1392.5 1360.7 1327.1 1291.2 1252.8 1211.6 1166.4
## agegr61-68 1500 1433.5 1361.7 1286.2 1207.0 1122.6 1033.6  940.0  844.6  743.2
## agegr69-75 1457 1343.8 1225.3 1100.9  968.7  829.8  685.1  549.5  434.0  327.0
## agegr76-95 1541 1236.6  968.2  735.5  528.2  355.5  233.9  151.4   92.3   52.1
##                20 c.exp.surv
## agegr12-60 1115.8       28.7
## agegr61-68  635.6       17.3
## agegr69-75  231.6       12.0
## agegr76-95   27.3        6.7
plot.nessie(nessie.obj, demVar="agegr",  maxEventTime = 15)

Scenario 1: Net survival by age categories

startTime_C1 <-Sys.time()
rs.obj_C1 <- rs.forest(formula = Surv(time,stat)~agegr,
                       data = colrec[colrec$stage!="99"&colrec$age<75,] ,
                       ratetable = slopop,
                       rmap = list(age = age*325.24, sex=sex,year=diag),
                       ntree = 5  ,
                       mtry = 1,
                       nodesize = 200,
                       splitrule = "custom",
                       membership = TRUE)
endTime_C1 <- Sys.time()

procTime_C1 <- endTime_C1 - startTime_C1

procTime_C1
## Time difference of 1.220972 mins
rs.obj_C1
##                          Sample size: 4263
##                     Number of deaths: 3319
##                      Number of trees: 5
##            Forest terminal node size: 200
##        Average no. of terminal nodes: 3
## No. of variables tried at each split: 1
##               Total no. of variables: 1
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 2694
##                             Analysis: RSF
##                               Family: relative survival
##                       Splitting rule: logrank-type (RS)
##        Number of random split points: 10
##                           (OOB) CRPS: 0.20034784
##    (OOB) Requested performance error: 0.44741032
subgroup1 <-  colrec[2,8]


pred1_C1 <- predict.rsforest(rsforest.obj = rs.obj_C1, 
                             newdata = subgroup1, 
                             formula = Surv(time,stat)~1, 
                             data = colrec[colrec$stage!="99"&colrec$age<75,], 
                             ratetable = slopop, 
                             scale =365)

subgroup2 <-  colrec[1,8]

pred2_C1 <- predict.rsforest(rsforest.obj = rs.obj_C1, 
                             newdata = subgroup2, 
                             formula = Surv(time,stat)~1, 
                             data = colrec[colrec$stage!="99"&colrec$age<75,], 
                             ratetable = slopop, 
                             scale =365)

subgroup3 <-  colrec[603,8]
 

pred3_C1 <- predict.rsforest(rsforest.obj = rs.obj_C1, 
                             newdata = subgroup3, 
                             formula = Surv(time,stat)~1, 
                             data = colrec[colrec$stage!="99"&colrec$age<75,], 
                             ratetable =slopop, 
                             scale =365)

Scenario 2: Net survival by cancer stage

startTime_C2<-Sys.time()
rs.obj_C2<- rs.forest(formula = Surv(time,stat)~stage ,
                      data = colrec[colrec$stage!="99"&colrec$age<75,] ,
                      ratetable = slopop,
                      rmap = list(age = age*325.24, sex = sex, year = diag),
                      ntree = 5,
                      mtry = 1,
                      nodesize = 200,
                      splitrule = "custom",
                      membership = TRUE)
endTime_C2 <- Sys.time()

procTime_C2 <- endTime_C2  - startTime_C2

procTime_C2
## Time difference of 1.261088 mins
rs.obj_C2
##                          Sample size: 4263
##                     Number of deaths: 3319
##                      Number of trees: 5
##            Forest terminal node size: 200
##        Average no. of terminal nodes: 3
## No. of variables tried at each split: 1
##               Total no. of variables: 1
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 2694
##                             Analysis: RSF
##                               Family: relative survival
##                       Splitting rule: logrank-type (RS)
##        Number of random split points: 10
##                           (OOB) CRPS: 0.17108805
##    (OOB) Requested performance error: 0.34090125
subgroup1 <- colrec[1,6]

pred1_C2 <- predict.rsforest(rsforest.obj = rs.obj_C2, 
                             newdata = subgroup1, 
                             formula = Surv(time,stat)~1, 
                             data = colrec[colrec$stage!="99"&colrec$age<75,], 
                             ratetable = slopop, 
                             scale =365)

subgroup2 <- colrec[603,6]

subgroup2
## [1] 2
## Levels: 1 2 3 99
pred2_C2 <- predict.rsforest(rsforest.obj = rs.obj_C2, 
                             newdata = subgroup2, 
                             formula = Surv(time,stat)~1, 
                             data = colrec[colrec$stage!="99"&colrec$age<75,], 
                             ratetable =slopop, 
                             scale =365)

subgroup3 <- colrec[2,6]

pred3_C2 <- predict.rsforest(rsforest.obj = rs.obj_C2, 
                             newdata = subgroup3, 
                             formula = Surv(time,stat)~1, 
                             data = colrec[colrec$stage!="99"&colrec$age<75,], 
                             ratetable =slopop, 
                             scale =365)

Scenario 3: Net survival by age and stage

startTime_C3 <-Sys.time()
rs.obj_C3<- rs.forest(formula = Surv(time,stat) ~ stage + agegr ,
                      data = colrec[colrec$stage!="99"&colrec$age<75,],
                      ratetable = slopop,
                      rmap = list(age = age*325.24, sex=sex,year=diag),
                      ntree = 5,
                      mtry = 2,
                      nodesize = 200,
                      splitrule = "custom",
                      membership = TRUE)
endTime_C3 <- Sys.time()

procTime_C3 <- endTime_C3 - startTime_C3

procTime_C3
## Time difference of 3.350219 mins
rs.obj_C3
##                          Sample size: 4263
##                     Number of deaths: 3319
##                      Number of trees: 5
##            Forest terminal node size: 200
##        Average no. of terminal nodes: 9
## No. of variables tried at each split: 2
##               Total no. of variables: 2
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 2694
##                             Analysis: RSF
##                               Family: relative survival
##                       Splitting rule: logrank-type (RS)
##        Number of random split points: 10
##                           (OOB) CRPS: 0.16308106
##    (OOB) Requested performance error: 0.30443408
#### Ages by cancer stage I ######
sub1_stage1 <- colrec[which(colrec$stage=="1"&colrec$agegr=="12-60")[1],c(6,8)]
sub2_stage1 <- colrec[which(colrec$stage=="1"&colrec$agegr=="61-68")[1],c(6,8)]
sub3_stage1 <- colrec[which(colrec$stage=="1"&colrec$agegr=="69-75")[1],c(6,8)]
 

pred1_C3_stage1 <- predict.rsforest(rsforest.obj = rs.obj_C3, 
                                    newdata = sub1_stage1, 
                                    formula = Surv(time,stat)~1, 
                                    data = colrec[colrec$stage!="99"&colrec$age<75,], 
                                    ratetable =slopop, 
                                    scale =365)

pred2_C3_stage1 <- predict.rsforest(rsforest.obj = rs.obj_C3, 
                                    newdata = sub2_stage1, 
                                    formula = Surv(time,stat)~1, 
                                    data = colrec[colrec$stage!="99"&colrec$age<75,], 
                                    ratetable =slopop, 
                                    scale =365)

pred3_C3_stage1 <- predict.rsforest(rsforest.obj = rs.obj_C3, 
                                    newdata = sub3_stage1, 
                                    formula = Surv(time,stat)~1, 
                                    data = colrec[colrec$stage!="99"&colrec$age<75,], 
                                    ratetable =slopop, 
                                    scale =365)


#### Ages by cancer stage II ######
sub1_stage2 <- colrec[which(colrec$stage=="2"&colrec$agegr=="12-60")[1],c(6,8)]
sub2_stage2 <- colrec[which(colrec$stage=="2"&colrec$agegr=="61-68")[1],c(6,8)]
sub3_stage2 <- colrec[which(colrec$stage=="2"&colrec$agegr=="69-75")[1],c(6,8)]
 
pred1_C3_stage2 <- predict.rsforest(rsforest.obj = rs.obj_C3, 
                                    newdata = sub1_stage2, 
                                    formula = Surv(time,stat)~1, 
                                    data = colrec[colrec$stage!="99"&colrec$age<75,], 
                                    ratetable =slopop, 
                                    scale =365)

pred2_C3_stage2 <- predict.rsforest(rsforest.obj = rs.obj_C3, 
                                    newdata = sub2_stage2, 
                                    formula = Surv(time,stat)~1, 
                                    data = colrec[colrec$stage!="99"&colrec$age<75,], 
                                    ratetable =slopop, 
                                    scale =365)

pred3_C3_stage2 <- predict.rsforest(rsforest.obj = rs.obj_C3, 
                                    newdata = sub3_stage2, 
                                    formula = Surv(time,stat)~1, 
                                    data = colrec[colrec$stage!="99"&colrec$age<75,], 
                                    ratetable =slopop, 
                                    scale =365)
 

#### Ages by cancer stage III ######
sub1_stage3 <- colrec[which(colrec$stage=="3"&colrec$agegr=="12-60")[1],c(6,8)]
sub2_stage3 <- colrec[which(colrec$stage=="3"&colrec$agegr=="61-68")[1],c(6,8)]
sub3_stage3 <- colrec[which(colrec$stage=="3"&colrec$agegr=="69-75")[1],c(6,8)]
 
pred1_C3_stage3 <- predict.rsforest(rsforest.obj = rs.obj_C3, 
                                    newdata = sub1_stage3, 
                                    formula = Surv(time,stat)~1, 
                                    data = colrec[colrec$stage!="99"&colrec$age<75,], 
                                    ratetable =slopop, 
                                    scale =365)

pred2_C3_stage3 <- predict.rsforest(rsforest.obj = rs.obj_C3, 
                                    newdata = sub2_stage3, 
                                    formula = Surv(time,stat)~1, 
                                    data = colrec[colrec$stage!="99"&colrec$age<75,], 
                                    ratetable = slopop, 
                                    scale =365)

pred3_C3_stage3 <- predict.rsforest(rsforest.obj = rs.obj_C3, 
                                    newdata =  sub3_stage3, 
                                    formula = Surv(time,stat)~1, 
                                    data = colrec[colrec$stage!="99"&colrec$age<75,], 
                                    ratetable =slopop, 
                                    scale =365)

Scenario 4: Net survival by age and stage for colon and rectal cancer

startTime_C4 <-Sys.time()
rs.obj_C4<- rs.forest(formula = Surv(time,stat)~ stage + agegr + site ,
                      data = colrec[colrec$stage!="99"&colrec$age<75,],
                      ratetable = slopop,
                      rmap = list(age = age*325.24, sex=sex,year=diag),
                      ntree = 5  ,
                      mtry = 2,
                      nodesize = 100,
                      splitrule = "custom",
                      membership = TRUE)
endTime_C4 <- Sys.time()

procTime_C4 <- endTime_C4 - startTime_C4

procTime_C4
## Time difference of 3.29742 mins
rs.obj_C4
##                          Sample size: 4263
##                     Number of deaths: 3319
##                      Number of trees: 5
##            Forest terminal node size: 100
##        Average no. of terminal nodes: 14.8
## No. of variables tried at each split: 2
##               Total no. of variables: 3
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 2694
##                             Analysis: RSF
##                               Family: relative survival
##                       Splitting rule: logrank-type (RS)
##        Number of random split points: 10
##                           (OOB) CRPS: 0.16087096
##    (OOB) Requested performance error: 0.30366507