Patients with colon and rectal cancer diagnosed in 1994-2000
library(relsurvForest)
data(colrec)
head(colrec)
## sex age diag time stat stage site agegr
## 1 1 62.98324 12656 16 0 1 rectum 61-68
## 2 2 33.07962 13388 504 0 3 rectum 12-60
## 3 1 66.46862 12711 22 0 3 colon 61-68
## 4 2 80.10076 13971 3998 0 1 colon 76-95
## 6 2 83.27675 12997 9 0 99 colon 76-95
## 7 1 41.49600 13814 88 0 2 colon 12-60
nessie.obj <- nessie(Surv(time,stat)~agegr,
data = colrec,
ratetable = slopop,
times = seq(0,20,2),
rmap = list(age = age*365.24, sex = sex, year = diag))
##
## 0 2 4 6 8 10 12 14 16 18
## agegr12-60 1473 1448.8 1421.8 1392.5 1360.7 1327.1 1291.2 1252.8 1211.6 1166.4
## agegr61-68 1500 1433.5 1361.7 1286.2 1207.0 1122.6 1033.6 940.0 844.6 743.2
## agegr69-75 1457 1343.8 1225.3 1100.9 968.7 829.8 685.1 549.5 434.0 327.0
## agegr76-95 1541 1236.6 968.2 735.5 528.2 355.5 233.9 151.4 92.3 52.1
## 20 c.exp.surv
## agegr12-60 1115.8 28.7
## agegr61-68 635.6 17.3
## agegr69-75 231.6 12.0
## agegr76-95 27.3 6.7
plot.nessie(nessie.obj, demVar="agegr", maxEventTime = 15)

Scenario 1: Net survival by age categories
startTime_C1 <-Sys.time()
rs.obj_C1 <- rs.forest(formula = Surv(time,stat)~agegr,
data = colrec[colrec$stage!="99"&colrec$age<75,] ,
ratetable = slopop,
rmap = list(age = age*325.24, sex=sex,year=diag),
ntree = 5 ,
mtry = 1,
nodesize = 200,
splitrule = "custom",
membership = TRUE)
endTime_C1 <- Sys.time()
procTime_C1 <- endTime_C1 - startTime_C1
procTime_C1
## Time difference of 1.220972 mins
rs.obj_C1
## Sample size: 4263
## Number of deaths: 3319
## Number of trees: 5
## Forest terminal node size: 200
## Average no. of terminal nodes: 3
## No. of variables tried at each split: 1
## Total no. of variables: 1
## Resampling used to grow trees: swor
## Resample size used to grow trees: 2694
## Analysis: RSF
## Family: relative survival
## Splitting rule: logrank-type (RS)
## Number of random split points: 10
## (OOB) CRPS: 0.20034784
## (OOB) Requested performance error: 0.44741032
subgroup1 <- colrec[2,8]
pred1_C1 <- predict.rsforest(rsforest.obj = rs.obj_C1,
newdata = subgroup1,
formula = Surv(time,stat)~1,
data = colrec[colrec$stage!="99"&colrec$age<75,],
ratetable = slopop,
scale =365)
subgroup2 <- colrec[1,8]
pred2_C1 <- predict.rsforest(rsforest.obj = rs.obj_C1,
newdata = subgroup2,
formula = Surv(time,stat)~1,
data = colrec[colrec$stage!="99"&colrec$age<75,],
ratetable = slopop,
scale =365)
subgroup3 <- colrec[603,8]
pred3_C1 <- predict.rsforest(rsforest.obj = rs.obj_C1,
newdata = subgroup3,
formula = Surv(time,stat)~1,
data = colrec[colrec$stage!="99"&colrec$age<75,],
ratetable =slopop,
scale =365)

Scenario 2: Net survival by cancer stage
startTime_C2<-Sys.time()
rs.obj_C2<- rs.forest(formula = Surv(time,stat)~stage ,
data = colrec[colrec$stage!="99"&colrec$age<75,] ,
ratetable = slopop,
rmap = list(age = age*325.24, sex = sex, year = diag),
ntree = 5,
mtry = 1,
nodesize = 200,
splitrule = "custom",
membership = TRUE)
endTime_C2 <- Sys.time()
procTime_C2 <- endTime_C2 - startTime_C2
procTime_C2
## Time difference of 1.261088 mins
rs.obj_C2
## Sample size: 4263
## Number of deaths: 3319
## Number of trees: 5
## Forest terminal node size: 200
## Average no. of terminal nodes: 3
## No. of variables tried at each split: 1
## Total no. of variables: 1
## Resampling used to grow trees: swor
## Resample size used to grow trees: 2694
## Analysis: RSF
## Family: relative survival
## Splitting rule: logrank-type (RS)
## Number of random split points: 10
## (OOB) CRPS: 0.17108805
## (OOB) Requested performance error: 0.34090125
subgroup1 <- colrec[1,6]
pred1_C2 <- predict.rsforest(rsforest.obj = rs.obj_C2,
newdata = subgroup1,
formula = Surv(time,stat)~1,
data = colrec[colrec$stage!="99"&colrec$age<75,],
ratetable = slopop,
scale =365)
subgroup2 <- colrec[603,6]
subgroup2
## [1] 2
## Levels: 1 2 3 99
pred2_C2 <- predict.rsforest(rsforest.obj = rs.obj_C2,
newdata = subgroup2,
formula = Surv(time,stat)~1,
data = colrec[colrec$stage!="99"&colrec$age<75,],
ratetable =slopop,
scale =365)
subgroup3 <- colrec[2,6]
pred3_C2 <- predict.rsforest(rsforest.obj = rs.obj_C2,
newdata = subgroup3,
formula = Surv(time,stat)~1,
data = colrec[colrec$stage!="99"&colrec$age<75,],
ratetable =slopop,
scale =365)

Scenario 3: Net survival by age and stage
startTime_C3 <-Sys.time()
rs.obj_C3<- rs.forest(formula = Surv(time,stat) ~ stage + agegr ,
data = colrec[colrec$stage!="99"&colrec$age<75,],
ratetable = slopop,
rmap = list(age = age*325.24, sex=sex,year=diag),
ntree = 5,
mtry = 2,
nodesize = 200,
splitrule = "custom",
membership = TRUE)
endTime_C3 <- Sys.time()
procTime_C3 <- endTime_C3 - startTime_C3
procTime_C3
## Time difference of 3.350219 mins
rs.obj_C3
## Sample size: 4263
## Number of deaths: 3319
## Number of trees: 5
## Forest terminal node size: 200
## Average no. of terminal nodes: 9
## No. of variables tried at each split: 2
## Total no. of variables: 2
## Resampling used to grow trees: swor
## Resample size used to grow trees: 2694
## Analysis: RSF
## Family: relative survival
## Splitting rule: logrank-type (RS)
## Number of random split points: 10
## (OOB) CRPS: 0.16308106
## (OOB) Requested performance error: 0.30443408
#### Ages by cancer stage I ######
sub1_stage1 <- colrec[which(colrec$stage=="1"&colrec$agegr=="12-60")[1],c(6,8)]
sub2_stage1 <- colrec[which(colrec$stage=="1"&colrec$agegr=="61-68")[1],c(6,8)]
sub3_stage1 <- colrec[which(colrec$stage=="1"&colrec$agegr=="69-75")[1],c(6,8)]
pred1_C3_stage1 <- predict.rsforest(rsforest.obj = rs.obj_C3,
newdata = sub1_stage1,
formula = Surv(time,stat)~1,
data = colrec[colrec$stage!="99"&colrec$age<75,],
ratetable =slopop,
scale =365)
pred2_C3_stage1 <- predict.rsforest(rsforest.obj = rs.obj_C3,
newdata = sub2_stage1,
formula = Surv(time,stat)~1,
data = colrec[colrec$stage!="99"&colrec$age<75,],
ratetable =slopop,
scale =365)
pred3_C3_stage1 <- predict.rsforest(rsforest.obj = rs.obj_C3,
newdata = sub3_stage1,
formula = Surv(time,stat)~1,
data = colrec[colrec$stage!="99"&colrec$age<75,],
ratetable =slopop,
scale =365)
#### Ages by cancer stage II ######
sub1_stage2 <- colrec[which(colrec$stage=="2"&colrec$agegr=="12-60")[1],c(6,8)]
sub2_stage2 <- colrec[which(colrec$stage=="2"&colrec$agegr=="61-68")[1],c(6,8)]
sub3_stage2 <- colrec[which(colrec$stage=="2"&colrec$agegr=="69-75")[1],c(6,8)]
pred1_C3_stage2 <- predict.rsforest(rsforest.obj = rs.obj_C3,
newdata = sub1_stage2,
formula = Surv(time,stat)~1,
data = colrec[colrec$stage!="99"&colrec$age<75,],
ratetable =slopop,
scale =365)
pred2_C3_stage2 <- predict.rsforest(rsforest.obj = rs.obj_C3,
newdata = sub2_stage2,
formula = Surv(time,stat)~1,
data = colrec[colrec$stage!="99"&colrec$age<75,],
ratetable =slopop,
scale =365)
pred3_C3_stage2 <- predict.rsforest(rsforest.obj = rs.obj_C3,
newdata = sub3_stage2,
formula = Surv(time,stat)~1,
data = colrec[colrec$stage!="99"&colrec$age<75,],
ratetable =slopop,
scale =365)
#### Ages by cancer stage III ######
sub1_stage3 <- colrec[which(colrec$stage=="3"&colrec$agegr=="12-60")[1],c(6,8)]
sub2_stage3 <- colrec[which(colrec$stage=="3"&colrec$agegr=="61-68")[1],c(6,8)]
sub3_stage3 <- colrec[which(colrec$stage=="3"&colrec$agegr=="69-75")[1],c(6,8)]
pred1_C3_stage3 <- predict.rsforest(rsforest.obj = rs.obj_C3,
newdata = sub1_stage3,
formula = Surv(time,stat)~1,
data = colrec[colrec$stage!="99"&colrec$age<75,],
ratetable =slopop,
scale =365)
pred2_C3_stage3 <- predict.rsforest(rsforest.obj = rs.obj_C3,
newdata = sub2_stage3,
formula = Surv(time,stat)~1,
data = colrec[colrec$stage!="99"&colrec$age<75,],
ratetable = slopop,
scale =365)
pred3_C3_stage3 <- predict.rsforest(rsforest.obj = rs.obj_C3,
newdata = sub3_stage3,
formula = Surv(time,stat)~1,
data = colrec[colrec$stage!="99"&colrec$age<75,],
ratetable =slopop,
scale =365)

Scenario 4: Net survival by age and stage for colon and rectal
cancer
startTime_C4 <-Sys.time()
rs.obj_C4<- rs.forest(formula = Surv(time,stat)~ stage + agegr + site ,
data = colrec[colrec$stage!="99"&colrec$age<75,],
ratetable = slopop,
rmap = list(age = age*325.24, sex=sex,year=diag),
ntree = 5 ,
mtry = 2,
nodesize = 100,
splitrule = "custom",
membership = TRUE)
endTime_C4 <- Sys.time()
procTime_C4 <- endTime_C4 - startTime_C4
procTime_C4
## Time difference of 3.29742 mins
rs.obj_C4
## Sample size: 4263
## Number of deaths: 3319
## Number of trees: 5
## Forest terminal node size: 100
## Average no. of terminal nodes: 14.8
## No. of variables tried at each split: 2
## Total no. of variables: 3
## Resampling used to grow trees: swor
## Resample size used to grow trees: 2694
## Analysis: RSF
## Family: relative survival
## Splitting rule: logrank-type (RS)
## Number of random split points: 10
## (OOB) CRPS: 0.16087096
## (OOB) Requested performance error: 0.30366507

