Data sets
head(rdata)
## time cens age sex year agegr
## 1 2657 1 68 2 8210 62-70
## 2 1097 1 63 2 8278 62-70
## 3 3764 1 60 1 8254 54-61
## 4 3724 1 66 2 8054 62-70
## 5 5076 0 57 2 8224 54-61
## 6 139 1 57 2 8233 54-61
head(colrec)
## sex age diag time cens stage site agecat
## 1 1 23004 12656 16 0 I rectum [60, 68)
## 2 2 12082 13388 504 0 III rectum [12, 60)
## 3 1 24277 12711 22 0 III colon [60, 68)
## 4 2 29256 13971 3998 0 I colon [75, 95)
## 6 2 30416 12997 9 0 <NA> colon [75, 95)
## 7 1 15156 13814 88 0 II colon [12, 60)
head(lung12boost)
## sex agediag dep gor typeKC dead survtday CM_HES_6_108_cm1 CM_HES_6_108_cm2
## 1 1 76 1 H NSCLC 0 1252 0 0
## 2 1 81 3 H NSCLC 1 220 0 0
## 3 1 64 3 H NSCLC 1 146 0 0
## 4 2 73 3 H NSCLC 1 377 0 0
## 5 2 73 5 H NSCLC 1 47 0 0
## 6 2 81 2 H NSCLC 1 21 0 0
## CM_HES_6_108_cm3 CM_HES_6_108_cm4 CM_HES_6_108_cm5 CM_HES_6_108_cm6
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 1
## CM_HES_6_108_cm7 CM_HES_6_108_cm8 CM_HES_6_108_cm9 CM_HES_6_108_cm10
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## CM_HES_6_108_cm11 CM_HES_6_108_cm12 CM_HES_6_108_cm13 CM_HES_6_108_cm14
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## CM_HES_6_108_cm15 CM_HES_6_108_cm16 CM_HES_6_108_cm17 CM_HES_6_108_cm18
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## stage PS
## 1 I-II Good
## 2 III-IV Poor
## 3 III-IV Poor
## 4 III-IV Good
## 5 III-IV Poor
## 6 III-IV Poor
Limiting the data for relative survival analysis
library(relsurvForest)
## Carregando pacotes exigidos: ggplot2
## Carregando pacotes exigidos: gridExtra
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
## Carregando pacotes exigidos: tidyr
##
## relsurvForest 3.2.2
##
## Type rfsrc.news() to see new features, changes, and bug fixes.
##
nessie.obj <- nessie(Surv(time,cens)~agegr,
data=rdata,
ratetable = slopop,
times = seq(0,20,2),
rmap = list(age = age*365.24, sex=sex,year=year))
##
## 0 2 4 6 8 10 12 14 16 18 20
## agegr<54 270 265.0 259.6 253.6 247.0 239.6 231.8 223.2 214.0 204.1 193.7
## agegr54-61 256 246.4 236.2 225.2 213.6 200.8 187.4 173.4 158.5 143.2 127.4
## agegr62-70 255 237.4 218.9 199.7 179.3 158.1 136.6 115.9 95.6 76.5 58.8
## agegr71-95 259 216.9 178.8 143.9 111.8 83.6 60.5 41.8 27.7 17.4 10.2
## c.exp.surv
## agegr<54 28.9
## agegr54-61 19.7
## agegr62-70 13.5
## agegr71-95 7.9
plot.nessie(nessie.obj, demVar="agegr", time.interest = 5, maxEventTime = 15)

Building a random forest for relative survival data
rs.obj<- rs.forest(formula = Surv(time,cens)~agegr,
data = rdata[rdata$age<70,] ,
ratetable = slopop,
rmap = list(age = age*325.24),
ntree = 50 ,
mtry = 2,
nodesize = 100,
splitrule = "custom",
membership = TRUE)
rs.obj
## Sample size: 743
## Number of deaths: 318
## Number of trees: 50
## Forest terminal node size: 100
## Average no. of terminal nodes: 3
## No. of variables tried at each split: 1
## Total no. of variables: 1
## Resampling used to grow trees: swor
## Resample size used to grow trees: 470
## Analysis: RSF
## Family: relative survival
## Splitting rule: logrank-type (RS)
## Number of random split points: 10
## (OOB) CRPS: 0.17474385
## (OOB) Requested performance error: 0.45350079
Calibration
Computing time
Net survival prediction for subgroups of interest
predict.rsforest(rs.obj, newdata = rdata[50,], data = rdata[rdata$age<70,], scale=365.24)
