Data sets

head(rdata)
##   time cens age sex year agegr
## 1 2657    1  68   2 8210 62-70
## 2 1097    1  63   2 8278 62-70
## 3 3764    1  60   1 8254 54-61
## 4 3724    1  66   2 8054 62-70
## 5 5076    0  57   2 8224 54-61
## 6  139    1  57   2 8233 54-61
head(colrec)
##   sex   age  diag time cens stage   site   agecat
## 1   1 23004 12656   16    0     I rectum [60, 68)
## 2   2 12082 13388  504    0   III rectum [12, 60)
## 3   1 24277 12711   22    0   III  colon [60, 68)
## 4   2 29256 13971 3998    0     I  colon [75, 95)
## 6   2 30416 12997    9    0  <NA>  colon [75, 95)
## 7   1 15156 13814   88    0    II  colon [12, 60)
head(lung12boost)
##   sex agediag dep gor typeKC dead survtday CM_HES_6_108_cm1 CM_HES_6_108_cm2
## 1   1      76   1   H  NSCLC    0     1252                0                0
## 2   1      81   3   H  NSCLC    1      220                0                0
## 3   1      64   3   H  NSCLC    1      146                0                0
## 4   2      73   3   H  NSCLC    1      377                0                0
## 5   2      73   5   H  NSCLC    1       47                0                0
## 6   2      81   2   H  NSCLC    1       21                0                0
##   CM_HES_6_108_cm3 CM_HES_6_108_cm4 CM_HES_6_108_cm5 CM_HES_6_108_cm6
## 1                0                0                0                0
## 2                0                0                0                0
## 3                0                0                0                0
## 4                0                0                0                0
## 5                0                0                0                0
## 6                0                0                0                1
##   CM_HES_6_108_cm7 CM_HES_6_108_cm8 CM_HES_6_108_cm9 CM_HES_6_108_cm10
## 1                0                0                0                 0
## 2                0                0                0                 0
## 3                0                0                0                 0
## 4                0                0                0                 0
## 5                0                0                0                 0
## 6                0                0                0                 0
##   CM_HES_6_108_cm11 CM_HES_6_108_cm12 CM_HES_6_108_cm13 CM_HES_6_108_cm14
## 1                 0                 0                 0                 0
## 2                 0                 0                 0                 0
## 3                 0                 0                 0                 0
## 4                 0                 0                 0                 0
## 5                 0                 0                 0                 0
## 6                 0                 0                 0                 0
##   CM_HES_6_108_cm15 CM_HES_6_108_cm16 CM_HES_6_108_cm17 CM_HES_6_108_cm18
## 1                 0                 0                 0                 0
## 2                 0                 0                 0                 0
## 3                 0                 0                 0                 0
## 4                 0                 0                 0                 0
## 5                 0                 0                 0                 0
## 6                 0                 0                 0                 0
##    stage   PS
## 1   I-II Good
## 2 III-IV Poor
## 3 III-IV Poor
## 4 III-IV Good
## 5 III-IV Poor
## 6 III-IV Poor

Limiting the data for relative survival analysis

library(relsurvForest)
## Carregando pacotes exigidos: ggplot2
## Carregando pacotes exigidos: gridExtra
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
## Carregando pacotes exigidos: tidyr
## 
##  relsurvForest 3.2.2 
##  
##  Type rfsrc.news() to see new features, changes, and bug fixes. 
## 
nessie.obj <-  nessie(Surv(time,cens)~agegr,  
               data=rdata,
               ratetable = slopop,
               times = seq(0,20,2),
               rmap = list(age = age*365.24, sex=sex,year=year))
## 
##              0     2     4     6     8    10    12    14    16    18    20
## agegr<54   270 265.0 259.6 253.6 247.0 239.6 231.8 223.2 214.0 204.1 193.7
## agegr54-61 256 246.4 236.2 225.2 213.6 200.8 187.4 173.4 158.5 143.2 127.4
## agegr62-70 255 237.4 218.9 199.7 179.3 158.1 136.6 115.9  95.6  76.5  58.8
## agegr71-95 259 216.9 178.8 143.9 111.8  83.6  60.5  41.8  27.7  17.4  10.2
##            c.exp.surv
## agegr<54         28.9
## agegr54-61       19.7
## agegr62-70       13.5
## agegr71-95        7.9
plot.nessie(nessie.obj, demVar="agegr", time.interest = 5, maxEventTime = 15)

Building a random forest for relative survival data

rs.obj<- rs.forest(formula = Surv(time,cens)~agegr,
                   data = rdata[rdata$age<70,] , 
                   ratetable = slopop,
                   rmap = list(age = age*325.24),
                   ntree = 50 , 
                   mtry = 2,
                   nodesize = 100,
                   splitrule = "custom",
                   membership = TRUE)
rs.obj
##                          Sample size: 743
##                     Number of deaths: 318
##                      Number of trees: 50
##            Forest terminal node size: 100
##        Average no. of terminal nodes: 3
## No. of variables tried at each split: 1
##               Total no. of variables: 1
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 470
##                             Analysis: RSF
##                               Family: relative survival
##                       Splitting rule: logrank-type (RS)
##        Number of random split points: 10
##                           (OOB) CRPS: 0.17474385
##    (OOB) Requested performance error: 0.45350079

Calibration

Computing time

Net survival prediction for subgroups of interest

predict.rsforest(rs.obj, newdata = rdata[50,], data = rdata[rdata$age<70,], scale=365.24)