1. Paquetes necesarios.

library(survival)
## Warning: package 'survival' was built under R version 4.4.3
library(survminer)
## Warning: package 'survminer' was built under R version 4.4.3
## Cargando paquete requerido: ggplot2
## Cargando paquete requerido: ggpubr
## Warning: package 'ggpubr' was built under R version 4.4.2
## 
## Adjuntando el paquete: 'survminer'
## The following object is masked from 'package:survival':
## 
##     myeloma
library(naniar)
## Warning: package 'naniar' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(naniar)
library(dplyr)
library(survival)
library(survminer)
library(MASS)
## 
## Adjuntando el paquete: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(rms)
## Warning: package 'rms' was built under R version 4.4.3
## Cargando paquete requerido: Hmisc
## Warning: package 'Hmisc' was built under R version 4.4.3
## 
## Adjuntando el paquete: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(ggpubr)
library(sjPlot)
## Warning: package 'sjPlot' was built under R version 4.4.3
library(psych)
## Warning: package 'psych' was built under R version 4.4.3
## 
## Adjuntando el paquete: 'psych'
## The following object is masked from 'package:Hmisc':
## 
##     describe
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(ggsurvfit)
## Warning: package 'ggsurvfit' was built under R version 4.4.3
## 
## Adjuntando el paquete: 'ggsurvfit'
## The following object is masked from 'package:psych':
## 
##     %+%
library(tidycmprsk) 
## Warning: package 'tidycmprsk' was built under R version 4.4.3
library(riskRegression)
## Warning: package 'riskRegression' was built under R version 4.4.3
## Registered S3 method overwritten by 'riskRegression':
##   method        from 
##   nobs.multinom broom
## riskRegression version 2025.05.20
library(prodlim) 
## Warning: package 'prodlim' was built under R version 4.4.3
library(scales)
## 
## Adjuntando el paquete: 'scales'
## The following objects are masked from 'package:psych':
## 
##     alpha, rescale

2. Cargar el conjunto de datos.

data <- read.csv("drugs.csv", stringsAsFactors = FALSE)

3. Explorar observaciones con datos faltantes.

vis_miss(data)

4. Convertir variables categóricas a factor con etiquetas.

data$hercoc <- factor(data$hercoc,
                      levels = c(1, 2, 3, 4),
                      labels = c("Heroína + Oxicodona",
                                 "Solo Heroína",
                                 "Solo Oxicodona",
                                 "Cocaína u otras"))

data$ivhx <- factor(data$ivhx,
                    levels = c(1, 2, 3),
                    labels = c("Nunca", "Algunas veces", "Siempre"))

data$race <- factor(data$race,
                    levels = c(0, 1),
                    labels = c("Blanca", "Negra/Hispánica"))

data$treat <- factor(data$treat,
                     levels = c(1, 0),
                     labels = c("Sí", "No o aislados"))

data$fentanyl <- factor(data$fentanyl,
                        levels = c(1, 0),
                        labels = c("Sí", "No"))

data$sex <- factor(data$sex,
                   levels = c(0, 1),
                   labels = c("Mujer", "Hombre"))

5. Crear objeto con Surv: variable de respuesta “Tiempo al Evento”: tiempo_evento (time, status).

tiempo_evento <- Surv(data$time,data$relapse)
data$time #vemos el tiempo de seguimiento de cada observacion
##   [1] 188  26 207 144 551  32 459  22 210 184   5 212  87 201 260 210  84 196
##  [19]  19 441 449 659  21  53 225 161  87  89  44  37 523 226 259 289 103 624
##  [37]  68  57  65  79 559  79  87  91 297  45 246  37  37 538 541 184 122   5
##  [55] 156 121 231 111  38  15  54 127 105  11 153  11  79  46 655 166   6  95
##  [73]  83 151 220 227 343 119  43 545  47  15 721 321 167 107 491  35 123 597
##  [91] 720  13  31 228 553 190 307  73 208 267   2 169 125 655  70 398  59 122
## [109]  29   8  96 721 720  26  84 171 159   5   7 721 104 162  90 373 115  67
## [127]  30   8 168  70 130 285 569  87 310  87  30 156 658 273 168  83   4 708
## [145] 137 259 560 586 190  72 544   3 494 541  94 567  55  93 276  46   4 250
## [163] 106 552  90 203  67 559 106 374 630  61 560 150 568 490 222  56 282  35
## [181] 603 194 148 354 164  94  65 567 634 633 127 477 436 226 362 552 144 242
## [199]  56 299 167 380 120 248 218 115 224 132 148 593  26 113  32 292  89  21
## [217] 364 142 188   4  92  56 110 555 220  23 285  90  59 156 194 142  57 279
## [235] 118 567 562 239 578 551 313 560  54 198 164 325  62  45  53 253  51 540
## [253] 317 437 136 115 175 442 122 181 180  51 541 121 328   9 166 556 104 102
## [271]  50 144 545 537  67   6 207 290  20  74 100  72 152 115  92 554  92  69
## [289]  25 501  86  99  87 136 106 220  36 162 116 175 209 545 245 176  14 113
## [307] 159 354 174  23  26  98  23 555 290 543 274 536   3   5 119 164 548 175
## [325] 539 155  14 187  65 159  96 243  85   4 121 659 260 621 199 565 183 122
## [343] 170  15 268  79  23 100  98  81 546  58  39 575  17  91  57 499 123 143
## [361] 130 471  74  85  95  36  19  38 539 567 186 546  24 540 157  86 231 271
## [379]  14  75 147 105 324 538 300  73  65 568  84  22  44   7 540  21 537 186
## [397]  40 287  47  30 516 268 568 131 399  78  80 102   3 124  80  23 274  10
## [415] 459  10 176 332 119 217 285 576 106  81  47  76 348  20 306 192 216 189
## [433] 403 193  28 150  99 510 306 101 102 510  69 503  52 547 168 461 538 349
## [451]  44 548  12   6 575 589 408 232 143 582 134   7 548  81 170  29  78  81
## [469] 369  69 115 361 245 233 227  97 547 224 211 220  54 192 138 107 597 226
## [487] 434 106 180 557 556 619 546  85 233 102 548  99  36  32  78 502  71  59
## [505] 115 533  10 274 255 503 256   9 550 386 547  45  58 124  23 243 549  12
## [523]  51 562  94 204 238 140 120 154 177 119  83 130  11 159 211  33  72 161
## [541] 191 181 546 540  76   7  44 103  79 339  90 542 384 255 431 587 198 551
## [559] 110 541 242 537  56  34 567 549 133 226 401  14 548 224 540 237 354 123
## [577] 170 203 360 139 215 129 396 547 547  71 168 228 551  63  51 548 231 280
## [595] 184  86 560  46 200 244 182 296  24 142 120  47 519 248  31 567 353 458
## [613] 554 116  74  10 355 232  68  48  60  50  51 126  18  35 379 377
data$relapse
##   [1] 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1
##  [38] 1 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1
##  [75] 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 0 0 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
## [112] 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0 1 1 1 1 0 1 1 0 0
## [149] 1 0 0 1 1 0 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 0 0 0 1 1 1 1 1 0 1 1 1 1
## [186] 1 1 0 0 0 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1
## [223] 1 0 1 1 1 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
## [260] 1 1 1 0 1 1 1 1 0 1 1 0 1 0 0 0 1 1 1 1 1 1 0 1 1 1 0 1 1 1 0 1 1 1 1 1 1
## [297] 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1
## [334] 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
## [371] 1 0 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 0 1 1 1 0 1 1 1 0 1 1 1 1
## [408] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0
## [445] 1 0 1 1 0 1 1 0 1 1 0 0 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1
## [482] 1 1 1 0 1 1 1 1 0 0 0 0 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 0 1 0 1 1 1
## [519] 0 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 0 1 1 1
## [556] 0 1 0 1 0 1 0 1 1 0 0 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 0 0 1 0
## [593] 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
tiempo_evento #tiempo de seguimiento y el status de censura marcado como +
##   [1] 188   26  207  144  551+  32  459   22  210  184    5  212   87  201+ 260 
##  [16] 210   84  196   19  441  449  659+  21   53  225  161   87   89   44   37 
##  [31] 523+ 226  259  289  103  624+  68   57   65   79  559+  79   87   91  297 
##  [46]  45  246   37   37  538+ 541+ 184  122    5  156  121  231  111   38   15 
##  [61]  54  127  105   11  153   11   79   46  655+ 166    6   95   83  151  220 
##  [76] 227  343  119   43  545+  47   15  721+ 321  167  107  491   35  123  597+
##  [91] 720+  13   31  228  553+ 190  307   73  208  267    2  169  125  655+  70 
## [106] 398   59  122   29    8   96  721+ 720+  26   84  171  159    5    7  721+
## [121] 104  162   90  373  115   67   30    8  168   70  130  285  569+  87  310 
## [136]  87   30+ 156  658+ 273  168   83    4  708+ 137  259  560+ 586+ 190   72+
## [151] 544+   3  494  541+  94  567+  55   93  276   46    4  250  106  552+  90 
## [166] 203   67  559  106  374  630+  61  560+ 150+ 568+ 490  222   56  282   35 
## [181] 603+ 194  148  354  164   94   65  567+ 634+ 633+ 127  477  436  226  362 
## [196] 552+ 144  242   56+ 299  167  380  120  248  218  115  224  132  148  593+
## [211]  26  113   32  292   89   21  364  142  188    4   92   56  110  555+ 220 
## [226]  23  285   90   59  156  194  142   57  279  118  567+ 562+ 239  578+ 551+
## [241] 313  560+  54  198  164  325   62   45   53  253   51  540+ 317  437  136 
## [256] 115  175  442  122  181  180   51  541+ 121  328    9  166  556+ 104  102 
## [271]  50+ 144  545+ 537+  67+   6  207  290   20   74  100   72+ 152  115   92 
## [286] 554+  92   69   25  501+  86   99   87  136  106  220   36  162  116  175 
## [301] 209  545+ 245  176   14  113  159  354  174   23   26   98   23  555+ 290 
## [316] 543+ 274  536+   3    5  119  164  548+ 175  539+ 155   14  187   65  159 
## [331]  96  243   85    4  121  659  260  621+ 199  565+ 183  122  170   15  268 
## [346]  79   23  100   98   81  546+  58   39+ 575+  17   91   57  499  123  143 
## [361] 130  471   74   85   95   36   19   38  539+ 567+ 186  546+  24  540+ 157 
## [376]  86  231  271   14   75  147  105  324  538+ 300   73   65  568   84   22 
## [391]  44    7  540+  21  537+ 186   40  287   47+  30  516  268  568+ 131  399 
## [406]  78   80  102    3  124   80   23  274   10  459   10  176  332  119  217 
## [421] 285  576+ 106   81   47   76  348   20  306  192  216  189  403  193   28 
## [436] 150   99  510+ 306  101  102  510+  69  503+  52  547+ 168  461  538+ 349 
## [451]  44  548+  12    6  575+ 589+ 408  232  143  582+ 134    7  548+  81  170 
## [466]  29   78   81  369   69  115  361  245  233  227   97  547+ 224  211  220 
## [481]  54  192  138  107  597+ 226  434  106  180  557+ 556+ 619+ 546+  85  233 
## [496] 102  548+  99   36   32   78  502   71   59  115  533+  10  274  255  503+
## [511] 256    9  550+ 386  547+  45   58  124   23+ 243  549+  12   51  562+  94 
## [526] 204  238  140  120  154  177  119   83  130   11  159  211   33   72  161 
## [541] 191  181  546+ 540+  76    7   44  103   79  339   90  542+ 384  255  431 
## [556] 587+ 198  551+ 110  541+ 242  537+  56   34  567+ 549+ 133  226  401   14 
## [571] 548+ 224  540+ 237  354  123  170  203  360  139  215  129  396  547+ 547+
## [586]  71  168  228  551+  63+  51  548+ 231  280  184   86  560+  46  200  244 
## [601] 182  296   24  142  120   47  519  248   31  567+ 353  458  554+ 116   74 
## [616]  10  355  232   68   48   60   50   51  126   18   35  379  377

6a. Ajustar el modelo Kaplan-Meier global.

km_global <- survfit(tiempo_evento ~ 1, data = data) # ponemos 1 para marcar que es crudo, sin estratificar por ninguna variable.

6b. Ajustar el modelo Kaplan-Meier estratificado por treat.

km_fit <- survfit(tiempo_evento ~ treat, data = data) 

7. Graficar las curvas de Kaplan-Meier estratificadas por treat.

ggsurvplot(km_fit, data = data, 
           risk.table = TRUE, # ver tabla de pacientes en riesgo
           pval = TRUE,# agrega p-valor de logrank
           conf.int = TRUE,# Intervalo de confianza 95% por default
           cumcensor = TRUE,# tabla de censurados
           ggtheme = theme_minimal(), 
           palette = "simpsons", 
           surv.median.line = "hv")
## Warning in geom_segment(aes(x = 0, y = max(y2), xend = max(x1), yend = max(y2)), : All aesthetics have length 1, but the data has 2 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## All aesthetics have length 1, but the data has 2 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## All aesthetics have length 1, but the data has 2 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## All aesthetics have length 1, but the data has 2 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.

En relación a la estratificación por treat:

8. Realizar prueba de log-rank.

logrank <- survdiff(Surv(time, relapse) ~ treat, data = data)
logrank
## Call:
## survdiff(formula = Surv(time, relapse) ~ treat, data = data)
## 
##                       N Observed Expected (O-E)^2/E (O-E)^2/V
## treat=Sí            307      242      274      3.68      8.05
## treat=No o aislados 321      266      234      4.30      8.05
## 
##  Chisq= 8  on 1 degrees of freedom, p= 0.005

9. Selección de variables.

Vamos a usar un método automático asumiendo que se trata de un modelo PREDICTIVO donde buscamos los mejores predictores sin necesidad de que sean variables explicativas o un control por confundidores clásico.

  1. Ajustar modelo full.
data$fentanyl <- relevel(data$fentanyl, ref = "Sí")
data$treat <- relevel(data$treat, ref = "No o aislados")
data$ivhx <- relevel(data$ivhx, ref = "Siempre")
cox_model_full <- coxph(tiempo_evento ~ age + beck + hercoc + ivhx + ndrugtx + race + treat + sex + fentanyl, data = data)
options(scipen = 999)
summary(cox_model_full)
## Call:
## coxph(formula = tiempo_evento ~ age + beck + hercoc + ivhx + 
##     ndrugtx + race + treat + sex + fentanyl, data = data)
## 
##   n= 628, number of events= 508 
## 
##                            coef exp(coef)  se(coef)      z   Pr(>|z|)    
## age                   -0.028899  0.971514  0.007813 -3.699   0.000216 ***
## beck                   0.008276  1.008310  0.004642  1.783   0.074622 .  
## hercocSolo Heroína     0.121804  1.129532  0.140965  0.864   0.387549    
## hercocSolo Oxicodona   0.184595  1.202732  0.149800  1.232   0.217844    
## hercocCocaína u otras  0.241643  1.273339  0.149510  1.616   0.106044    
## ivhxNunca             -0.775234  0.460596  0.161890 -4.789 0.00000168 ***
## ivhxAlgunas veces     -0.191641  0.825603  0.138775 -1.381   0.167296    
## ndrugtx                0.027666  1.028052  0.008038  3.442   0.000578 ***
## raceNegra/Hispánica   -0.186848  0.829569  0.095633 -1.954   0.050723 .  
## treatSí               -0.275760  0.758995  0.090242 -3.056   0.002245 ** 
## sexHombre              0.081460  1.084870  0.103668  0.786   0.431995    
## fentanylNo            -0.162391  0.850109  0.111200 -1.460   0.144194    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                       exp(coef) exp(-coef) lower .95 upper .95
## age                      0.9715     1.0293    0.9568    0.9865
## beck                     1.0083     0.9918    0.9992    1.0175
## hercocSolo Heroína       1.1295     0.8853    0.8569    1.4890
## hercocSolo Oxicodona     1.2027     0.8314    0.8967    1.6132
## hercocCocaína u otras    1.2733     0.7853    0.9499    1.7069
## ivhxNunca                0.4606     2.1711    0.3354    0.6326
## ivhxAlgunas veces        0.8256     1.2112    0.6290    1.0837
## ndrugtx                  1.0281     0.9727    1.0120    1.0444
## raceNegra/Hispánica      0.8296     1.2054    0.6878    1.0006
## treatSí                  0.7590     1.3175    0.6360    0.9058
## sexHombre                1.0849     0.9218    0.8854    1.3293
## fentanylNo               0.8501     1.1763    0.6836    1.0571
## 
## Concordance= 0.626  (se = 0.013 )
## Likelihood ratio test= 99.36  on 12 df,   p=0.0000000000000007
## Wald test            = 94.88  on 12 df,   p=0.000000000000006
## Score (logrank) test = 98.47  on 12 df,   p=0.000000000000001
  1. Selección de variables para modelo final.

👉 El algoritmo elige eliminar la variable que baje más el AIC.

cox_model <- stepAIC(cox_model_full, direction = "both")
## Start:  AIC=5809.88
## tiempo_evento ~ age + beck + hercoc + ivhx + ndrugtx + race + 
##     treat + sex + fentanyl
## 
##            Df    AIC
## - hercoc    3 5806.6
## - sex       1 5808.5
## <none>        5809.9
## - fentanyl  1 5810.0
## - beck      1 5811.0
## - race      1 5811.6
## - treat     1 5817.2
## - ndrugtx   1 5818.4
## - age       1 5821.8
## - ivhx      2 5830.8
## 
## Step:  AIC=5806.58
## tiempo_evento ~ age + beck + ivhx + ndrugtx + race + treat + 
##     sex + fentanyl
## 
##            Df    AIC
## - sex       1 5805.0
## <none>        5806.6
## - beck      1 5807.2
## - race      1 5808.1
## - fentanyl  1 5809.1
## + hercoc    3 5809.9
## - treat     1 5813.2
## - ndrugtx   1 5814.5
## - age       1 5819.8
## - ivhx      2 5827.4
## 
## Step:  AIC=5805.04
## tiempo_evento ~ age + beck + ivhx + ndrugtx + race + treat + 
##     fentanyl
## 
##            Df    AIC
## <none>        5805.0
## - beck      1 5805.8
## - race      1 5806.5
## + sex       1 5806.6
## - fentanyl  1 5807.3
## + hercoc    3 5808.5
## - treat     1 5811.3
## - ndrugtx   1 5813.0
## - age       1 5818.2
## - ivhx      2 5827.5
summary(cox_model)
## Call:
## coxph(formula = tiempo_evento ~ age + beck + ivhx + ndrugtx + 
##     race + treat + fentanyl, data = data)
## 
##   n= 628, number of events= 508 
## 
##                          coef exp(coef)  se(coef)      z    Pr(>|z|)    
## age                 -0.029656  0.970779  0.007700 -3.851    0.000117 ***
## beck                 0.007697  1.007727  0.004603  1.672    0.094468 .  
## ivhxNunca           -0.684679  0.504252  0.136079 -5.031 0.000000487 ***
## ivhxAlgunas veces   -0.126967  0.880763  0.118015 -1.076    0.281993    
## ndrugtx              0.027021  1.027389  0.008075  3.346    0.000819 ***
## raceNegra/Hispánica -0.179050  0.836064  0.095567 -1.874    0.060993 .  
## treatSí             -0.257510  0.772974  0.089406 -2.880    0.003974 ** 
## fentanylNo          -0.212330  0.808698  0.102388 -2.074    0.038100 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                     exp(coef) exp(-coef) lower .95 upper .95
## age                    0.9708     1.0301    0.9562    0.9855
## beck                   1.0077     0.9923    0.9987    1.0169
## ivhxNunca              0.5043     1.9831    0.3862    0.6584
## ivhxAlgunas veces      0.8808     1.1354    0.6989    1.1100
## ndrugtx                1.0274     0.9733    1.0113    1.0438
## raceNegra/Hispánica    0.8361     1.1961    0.6933    1.0083
## treatSí                0.7730     1.2937    0.6487    0.9210
## fentanylNo             0.8087     1.2366    0.6617    0.9884
## 
## Concordance= 0.624  (se = 0.013 )
## Likelihood ratio test= 96.2  on 8 df,   p=<0.0000000000000002
## Wald test            = 91.9  on 8 df,   p=<0.0000000000000002
## Score (logrank) test = 95.36  on 8 df,   p=<0.0000000000000002

7. Chequeo de asunciones del modelo final.

Residuos de Schoenfeld

Test de proporcionalidad de riesgos de Schoenfeld.

test_ph <- cox.zph(cox_model)
test_ph
##           chisq df     p
## age       0.177  1 0.674
## beck      5.571  1 0.018
## ivhx      0.209  2 0.901
## ndrugtx   0.794  1 0.373
## race      3.267  1 0.071
## treat     3.569  1 0.059
## fentanyl  0.752  1 0.386
## GLOBAL   14.552  8 0.068

La prueba de proporcionalidad de riesgos (cox.zph) muestra lo siguiente:

Gráficos por variables de residuos de Schoenfeld en el tiempo.

Si el supuesto de Proporcionalidad del Hazard se cumple, estos residuos deberían fluctuar aleatoriamente alrededor de cero a lo largo del tiempo.

ggcoxzph(test_ph)

En el gráfico de residuos de Schoenfeld:

Gráficos Log-log.

Gráficos log(-log) para la variable ivhx.

fit_ivhx <- survfit(Surv(time, relapse) ~ ivhx, data = data)

ggsurvplot(
  fit_ivhx,
  data        = data,                
  fun         = "cloglog",
  xlab        = "Tiempo (días)",
  ylab        = "log(-log(S(t)))",
  legend.title= "Uso EV",
  legend.labs = levels(data$ivhx),
  conf.int    = FALSE,
  palette     = "simpsons"
)

Las líneas para las categorías de ivhx son relativamente paralelas y no se entrecruzan de forma marcada. Eso indica que el supuesto de riesgos proporcionales parece cumplirse para esa variable.

🔍 Puntos clave que apoyan esta conclusión:

Gráficos log(-log) para la variable race.

fit_race <- survfit(Surv(time, relapse) ~ race, data = data)

ggsurvplot(
  fit_race,
  data        = data,                
  fun         = "cloglog",
  xlab        = "Tiempo (días)",
  ylab        = "log(-log(S(t)))",
  legend.title= "Race",
  legend.labs = levels(data$race),
  conf.int    = FALSE,
  palette     = "simpsons"
)

Gráficos log(-log) para la variable treat.

fit_treat <- survfit(Surv(time, relapse) ~ treat, data = data)

ggsurvplot(
  fit_treat,
  data        = data,                
  fun         = "cloglog",
  xlab        = "Tiempo (días)",
  ylab        = "log(-log(S(t)))",
  legend.title= "Treat",
  legend.labs = levels(data$treat),
  conf.int    = FALSE,
  palette     = "simpsons"
)

Gráficos log(-log) para la variable fentanyl.

fit_fentanyl <- survfit(Surv(time, relapse) ~ fentanyl, data = data)

ggsurvplot(
  fit_fentanyl,
  data        = data,                
  fun         = "cloglog",
  xlab        = "Tiempo (días)",
  ylab        = "log(-log(S(t)))",
  legend.title= "Fentanyl",
  legend.labs = levels(data$fentanyl),
  conf.int    = FALSE,
  palette     = "simpsons"
)

Residuos de Martingale y Deviance.

Gráficos de residuos:

a.- Martingale.

ggcoxdiagnostics(cox_model, type = "martingale", linear.predictions = FALSE)
## `geom_smooth()` using formula = 'y ~ x'

b.- Deviance.

ggcoxdiagnostics(cox_model, type = "deviance", linear.predictions = FALSE)
## `geom_smooth()` using formula = 'y ~ x'

Conclusión general:

10. Evaluación de Discriminación y Calibración del modelo.

Discriminación.

Indice C de Harrell.

summary(cox_model)$concordance
##          C      se(C) 
## 0.62380793 0.01307642

✅ ¿Qué significa?

El índice C = 0.62 indica que el modelo tiene una capacidad de discriminación del 62%, es decir:

En el 62% de las veces, el modelo predice correctamente que un paciente con peor pronóstico (mayor riesgo) tendrá el evento antes que otro con mejor pronóstico.

Indice D de Somer (Dxy), Metricas Q D U, slope y R2 con paquete rms.

dd <- datadist(data); options(datadist = "dd")
cox_metrics <- cph(tiempo_evento ~ age + beck + ivhx + ndrugtx + race + treat + fentanyl, 
                   data = data, x = TRUE, y = TRUE, surv = TRUE)

Validamos métricas con bootstrap.

invisible(capture.output({ #esta linea es para evitar que se imprima el output del bootstrap)
  cox_rms <- validate(cox_metrics, method = "boot", B = 200)
}))

Métricas de discriminación y calibración (validadas con bootstrap).

Revisamos las metricas validadas (en la columna index.corrected).

cox_rms
##       index.orig training   test optimism index.corrected   n
## Dxy       0.2476   0.2546 0.2406   0.0140          0.2337 200
## R2        0.1420   0.1522 0.1329   0.0193          0.1227 200
## Slope     1.0000   1.0000 0.9268   0.0732          0.9268 200
## D         0.0162   0.0175 0.0150   0.0025          0.0137 200
## U        -0.0003  -0.0003 0.0003  -0.0006          0.0003 200
## Q         0.0165   0.0179 0.0147   0.0031          0.0134 200
## g         0.5319   0.5520 0.5067   0.0453          0.4866 200

Para visualizar más fácil podemos pedir el output solo la columna que nos interesa y redondear a 3 decimales.

round(cox_rms[,5],3)
##   Dxy    R2 Slope     D     U     Q     g 
## 0.234 0.123 0.927 0.014 0.000 0.013 0.487

Grafiquemos la curva de calibración.

Calibración. Evaluación Gráfica.

describeBy(data$time)
## Warning in describeBy(data$time): no grouping variable requested
##    vars   n  mean     sd median trimmed    mad min max range skew kurtosis   se
## X1    1 628 223.8 193.43    159  204.08 152.71   2 721   719 0.87    -0.56 7.72
invisible(capture.output({
cal <- calibrate(cox_metrics, method = "boot", B = 200, u = 180)
}))
plot(cal, 
     subtitles = FALSE,
     lwd = 2, 
     lty = 2, 
     main = "Gráfico de Calibración",
     cex.main = 1.2,
     cex.axis = 0.7,
     cex.lab = 0.9,
     xlab = "Probabilidad Predicha de Recaída a 180 días",
     ylab = "Recaída Observada")

🔍 Interpretación:

En el procedimiento con stepAIC() puede ocurrir que una variable como beck (p = 0.094468 en el Wald Test) se mantenga en el modelo porque:

cox_beck <- coxph(Surv(time, relapse) ~ beck, data = data)
data$deviance.ph <- resid(cox_beck, type = "deviance")
ggplot(data, aes(x = beck, y = deviance.ph)) +
  geom_point() +
  geom_smooth(method = "loess", se = TRUE, col = "blue", linetype = "dashed") +
  geom_hline(yintercept = 0, col = "red") +
  labs(
    title = "Residuos de Deviance vs beck",
    x = "Beck (Score de Depresión)",
    y = "Residuo Deviance"
  )
## `geom_smooth()` using formula = 'y ~ x'

El Inventario de Depresión de Beck se clasifica en diferentes niveles de gravedad según la puntuación total. Un puntaje de 0-9 indica depresión mínima, 10-18 depresión leve, 19-29 depresión moderada y 30-63 depresión severa.

En el gráfico no se observa una relación estrictamente lineal:

Esto es interesante porque:

Dado que el punto de corte de 30 en el BDI coincide con depresión severa, vamos a crear una variable dicotómica:

data <- data |> 
  mutate(beck_cat = factor(
    if_else(beck >= 30, "Severo", "No_severo"),
    levels = c("No_severo", "Severo")
  ))

Reajustamos el modelo.

cox_model_cat <- coxph(tiempo_evento ~ age + beck_cat + ivhx + ndrugtx +
                         race + treat + fentanyl, data = data)
summary(cox_model_cat)
## Call:
## coxph(formula = tiempo_evento ~ age + beck_cat + ivhx + ndrugtx + 
##     race + treat + fentanyl, data = data)
## 
##   n= 628, number of events= 508 
## 
##                          coef exp(coef)  se(coef)      z    Pr(>|z|)    
## age                 -0.030341  0.970115  0.007693 -3.944 0.000080167 ***
## beck_catSevero       0.160269  1.173826  0.136719  1.172    0.241096    
## ivhxNunca           -0.695323  0.498913  0.135893 -5.117 0.000000311 ***
## ivhxAlgunas veces   -0.115837  0.890620  0.118193 -0.980    0.327053    
## ndrugtx              0.026980  1.027347  0.008078  3.340    0.000837 ***
## raceNegra/Hispánica -0.172348  0.841686  0.095724 -1.800    0.071787 .  
## treatSí             -0.262987  0.768752  0.089408 -2.941    0.003267 ** 
## fentanylNo          -0.213854  0.807466  0.102479 -2.087    0.036906 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                     exp(coef) exp(-coef) lower .95 upper .95
## age                    0.9701     1.0308    0.9556    0.9849
## beck_catSevero         1.1738     0.8519    0.8979    1.5345
## ivhxNunca              0.4989     2.0044    0.3823    0.6512
## ivhxAlgunas veces      0.8906     1.1228    0.7065    1.1228
## ndrugtx                1.0273     0.9734    1.0112    1.0437
## raceNegra/Hispánica    0.8417     1.1881    0.6977    1.0154
## treatSí                0.7688     1.3008    0.6452    0.9160
## fentanylNo             0.8075     1.2384    0.6605    0.9871
## 
## Concordance= 0.62  (se = 0.013 )
## Likelihood ratio test= 94.75  on 8 df,   p=<0.0000000000000002
## Wald test            = 90.54  on 8 df,   p=0.0000000000000004
## Score (logrank) test = 93.89  on 8 df,   p=<0.0000000000000002

En la nueva salida (coxph con beck_cat) los resultados son bastante consistentes con el modelo anterior.

Vamos a crear una variable de categoría múltiple:

data <- data |>
  mutate(
    beck_cat4 = case_when(
      beck >= 0  & beck <= 9  ~ "Minima",
      beck >= 10 & beck <= 18 ~ "Leve",
      beck >= 19 & beck <= 29 ~ "Moderada",
      beck >= 30              ~ "Severa"
    ),
    beck_cat4 = factor(
      beck_cat4,
      levels = c("Minima", "Leve", "Moderada", "Severa")
    )
  )

Reajustamos el modelo.

cox_model_cat4 <- coxph(
  tiempo_evento ~ age + beck_cat4 + ivhx + ndrugtx +
    race + treat + fentanyl,
  data = data
)

summary(cox_model_cat4)
## Call:
## coxph(formula = tiempo_evento ~ age + beck_cat4 + ivhx + ndrugtx + 
##     race + treat + fentanyl, data = data)
## 
##   n= 628, number of events= 508 
## 
##                          coef exp(coef)  se(coef)      z    Pr(>|z|)    
## age                 -0.029957  0.970487  0.007716 -3.882    0.000103 ***
## beck_cat4Leve        0.059560  1.061369  0.124305  0.479    0.631839    
## beck_cat4Moderada    0.098344  1.103342  0.123511  0.796    0.425894    
## beck_cat4Severa      0.218708  1.244467  0.159781  1.369    0.171062    
## ivhxNunca           -0.687274  0.502945  0.136241 -5.045 0.000000455 ***
## ivhxAlgunas veces   -0.118976  0.887829  0.118360 -1.005    0.314798    
## ndrugtx              0.026740  1.027100  0.008091  3.305    0.000950 ***
## raceNegra/Hispánica -0.178174  0.836797  0.096034 -1.855    0.063550 .  
## treatSí             -0.261570  0.769842  0.089475 -2.923    0.003462 ** 
## fentanylNo          -0.214174  0.807208  0.102588 -2.088    0.036824 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                     exp(coef) exp(-coef) lower .95 upper .95
## age                    0.9705     1.0304    0.9559    0.9853
## beck_cat4Leve          1.0614     0.9422    0.8319    1.3542
## beck_cat4Moderada      1.1033     0.9063    0.8661    1.4055
## beck_cat4Severa        1.2445     0.8036    0.9099    1.7021
## ivhxNunca              0.5029     1.9883    0.3851    0.6569
## ivhxAlgunas veces      0.8878     1.1263    0.7040    1.1196
## ndrugtx                1.0271     0.9736    1.0109    1.0435
## raceNegra/Hispánica    0.8368     1.1950    0.6932    1.0101
## treatSí                0.7698     1.2990    0.6460    0.9174
## fentanylNo             0.8072     1.2388    0.6602    0.9870
## 
## Concordance= 0.623  (se = 0.013 )
## Likelihood ratio test= 95.39  on 10 df,   p=0.0000000000000005
## Wald test            = 91.15  on 10 df,   p=0.000000000000003
## Score (logrank) test = 94.57  on 10 df,   p=0.0000000000000007

📌 Interpretación:

Vamos a explorar la variable beck:

data <- data %>% mutate(beck_q= ntile(beck, 5))
data %>% dplyr::group_by(beck_q) %>% dplyr::summarise(tar=mean(relapse)) %>% ggplot(aes(x=beck_q, y=tar))+geom_point()+geom_smooth(method = "lm", col = "blue")
## `geom_smooth()` using formula = 'y ~ x'

data %>%
  dplyr::group_by(beck_q) %>%
  dplyr::summarise(tar = mean(relapse)) %>%
  ggplot(aes(x = beck_q, y = tar)) +
  geom_col(fill = "steelblue") +
  geom_text(aes(label = round(tar, 2)), vjust = -0.3) +
  labs(x = "Cuartiles de score de Beck", y = "Proporción de recaída") +
  theme_minimal()

quantile(data$beck, probs = c(0.6))
## 60% 
##  20

Como el valor de corte entre el 3er y 4to quintil (porque 0.6 = 60%) es 20, entonces, vamos a crear una variable dicotómica con valor de corte en 20:

data <- data |>
  mutate(beck_cat2 = factor(
    if_else(beck >= 20, "Moderado_Severo", "Minimo_Leve"),
    levels = c("Moderado_Severo", "Minimo_Leve")
  ))

Reajustamos el modelo.

cox_model_2 <- coxph(
  tiempo_evento ~ age + beck_cat2 + ivhx + ndrugtx + 
    race + treat + fentanyl,
  data = data
)

summary(cox_model_2)
## Call:
## coxph(formula = tiempo_evento ~ age + beck_cat2 + ivhx + ndrugtx + 
##     race + treat + fentanyl, data = data)
## 
##   n= 628, number of events= 508 
## 
##                           coef exp(coef)  se(coef)      z    Pr(>|z|)    
## age                  -0.029151  0.971270  0.007686 -3.792    0.000149 ***
## beck_cat2Minimo_Leve -0.184396  0.831606  0.090463 -2.038    0.041514 *  
## ivhxNunca            -0.688363  0.502398  0.135947 -5.063 0.000000412 ***
## ivhxAlgunas veces    -0.129022  0.878954  0.117937 -1.094    0.273958    
## ndrugtx               0.027202  1.027575  0.008122  3.349    0.000811 ***
## raceNegra/Hispánica  -0.180981  0.834452  0.095590 -1.893    0.058319 .  
## treatSí              -0.245730  0.782133  0.089670 -2.740    0.006137 ** 
## fentanylNo           -0.210277  0.810360  0.102304 -2.055    0.039839 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                      exp(coef) exp(-coef) lower .95 upper .95
## age                     0.9713     1.0296    0.9567    0.9860
## beck_cat2Minimo_Leve    0.8316     1.2025    0.6965    0.9929
## ivhxNunca               0.5024     1.9905    0.3849    0.6558
## ivhxAlgunas veces       0.8790     1.1377    0.6976    1.1075
## ndrugtx                 1.0276     0.9732    1.0113    1.0441
## raceNegra/Hispánica     0.8345     1.1984    0.6919    1.0064
## treatSí                 0.7821     1.2786    0.6561    0.9324
## fentanylNo              0.8104     1.2340    0.6631    0.9903
## 
## Concordance= 0.624  (se = 0.013 )
## Likelihood ratio test= 97.55  on 8 df,   p=<0.0000000000000002
## Wald test            = 93.49  on 8 df,   p=<0.0000000000000002
## Score (logrank) test = 97  on 8 df,   p=<0.0000000000000002

🔍 Observaciones principales:

Vamos a explorar la variable age:

data <- data %>% mutate(age_q= ntile(age, 10))
data %>% dplyr::group_by(age_q) %>% dplyr::summarise(tar=mean(relapse)) %>% ggplot(aes(x=age_q, y=tar))+geom_point()+geom_smooth(method = "lm", col = "blue")
## `geom_smooth()` using formula = 'y ~ x'

data %>%
  dplyr::group_by(age_q) %>%
  dplyr::summarise(tar = mean(relapse)) %>%
  ggplot(aes(x = age_q, y = tar)) +
  geom_col(fill = "steelblue") +
  geom_text(aes(label = round(tar, 2)), vjust = -0.3) +
  labs(x = "Deciles de edad", y = "Proporcion de recaida") +
  theme_minimal()

quantile(data$age, probs = c(0.9))
## 90% 
##  40

Como el valor de corte entre el 9no y 10mo decilo (porque 0.9 = 90%) es 40, entonces, vamos a crear una variable dicotómica con valor de corte pero 40:

data <- data |>
  mutate(age_cat2 = factor(
    if_else(age >= 40, "age_mas_40", "age_menos_40"),
    levels = c("age_menos_40", "age_mas_40")
  ))

Reajustamos el modelo.

cox_model_3 <- coxph(
  tiempo_evento ~ age_cat2 + beck_cat2 + ivhx + ndrugtx + 
    race + treat + fentanyl,
  data = data
)
summary(cox_model_3)
## Call:
## coxph(formula = tiempo_evento ~ age_cat2 + beck_cat2 + ivhx + 
##     ndrugtx + race + treat + fentanyl, data = data)
## 
##   n= 628, number of events= 508 
## 
##                           coef exp(coef)  se(coef)      z   Pr(>|z|)    
## age_cat2age_mas_40   -0.571355  0.564760  0.143647 -3.977 0.00006965 ***
## beck_cat2Minimo_Leve -0.206617  0.813331  0.090327 -2.287    0.02217 *  
## ivhxNunca            -0.638527  0.528070  0.133187 -4.794 0.00000163 ***
## ivhxAlgunas veces    -0.135751  0.873060  0.118023 -1.150    0.25006    
## ndrugtx               0.024958  1.025272  0.007929  3.148    0.00165 ** 
## raceNegra/Hispánica  -0.162369  0.850127  0.095387 -1.702    0.08871 .  
## treatSí              -0.238766  0.787599  0.089786 -2.659    0.00783 ** 
## fentanylNo           -0.255274  0.774704  0.100879 -2.530    0.01139 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                      exp(coef) exp(-coef) lower .95 upper .95
## age_cat2age_mas_40      0.5648     1.7707    0.4262    0.7484
## beck_cat2Minimo_Leve    0.8133     1.2295    0.6814    0.9709
## ivhxNunca               0.5281     1.8937    0.4067    0.6856
## ivhxAlgunas veces       0.8731     1.1454    0.6928    1.1003
## ndrugtx                 1.0253     0.9754    1.0095    1.0413
## raceNegra/Hispánica     0.8501     1.1763    0.7052    1.0249
## treatSí                 0.7876     1.2697    0.6605    0.9391
## fentanylNo              0.7747     1.2908    0.6357    0.9441
## 
## Concordance= 0.627  (se = 0.013 )
## Likelihood ratio test= 101  on 8 df,   p=<0.0000000000000002
## Wald test            = 96.17  on 8 df,   p=<0.0000000000000002
## Score (logrank) test = 100.1  on 8 df,   p=<0.0000000000000002

Vamos a explorar la variable ndrugtx:

data <- data %>% mutate(ndrugtx_q= ntile(ndrugtx, 5))
data %>% dplyr::group_by(ndrugtx_q) %>% dplyr::summarise(tar=mean(relapse)) %>% ggplot(aes(x=ndrugtx_q, y=tar))+geom_point()+geom_smooth(method = "lm", col = "blue")
## `geom_smooth()` using formula = 'y ~ x'

data %>%
  dplyr::group_by(ndrugtx_q) %>%
  dplyr::summarise(tar = mean(relapse)) %>%
  ggplot(aes(x = ndrugtx_q, y = tar)) +
  geom_col(fill = "steelblue") +
  geom_text(aes(label = round(tar, 2)), vjust = -0.3) +
  labs(x = "Cuartiles de ndrugtx", y = "Proporción de recaída") +
  theme_minimal()

quantile(data$ndrugtx, probs = c(0.6))
## 60% 
##   4

Como el valor de corte entre el 3er y 4to quintil (porque 0.6 = 60%) es 4, entonces, vamos a crear una variable dicotómica con valor de corte en 4:

data <- data |>
  mutate(ndrugtx_cat2 = factor(
    if_else(ndrugtx >= 4, "ndrugtx_mas_4", "ndrugtx_menos_4"),
    levels = c("ndrugtx_mas_4", "ndrugtx_menos_4")
  ))

Reajustamos el modelo.

cox_model_4 <- coxph(
  tiempo_evento ~ age_cat2 + beck_cat2 + ivhx + ndrugtx_cat2 + 
    race + treat + fentanyl,
  data = data
)
summary(cox_model_4)
## Call:
## coxph(formula = tiempo_evento ~ age_cat2 + beck_cat2 + ivhx + 
##     ndrugtx_cat2 + race + treat + fentanyl, data = data)
## 
##   n= 628, number of events= 508 
## 
##                                 coef exp(coef) se(coef)      z   Pr(>|z|)    
## age_cat2age_mas_40          -0.56329   0.56934  0.14332 -3.930 0.00008481 ***
## beck_cat2Minimo_Leve        -0.19019   0.82680  0.09015 -2.110    0.03488 *  
## ivhxNunca                   -0.61668   0.53973  0.13430 -4.592 0.00000439 ***
## ivhxAlgunas veces           -0.12364   0.88370  0.11825 -1.046    0.29576    
## ndrugtx_cat2ndrugtx_menos_4 -0.29495   0.74457  0.09363 -3.150    0.00163 ** 
## raceNegra/Hispánica         -0.15186   0.85911  0.09555 -1.589    0.11196    
## treatSí                     -0.24008   0.78657  0.08978 -2.674    0.00749 ** 
## fentanylNo                  -0.27801   0.75729  0.10094 -2.754    0.00589 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                             exp(coef) exp(-coef) lower .95 upper .95
## age_cat2age_mas_40             0.5693      1.756    0.4299    0.7540
## beck_cat2Minimo_Leve           0.8268      1.209    0.6929    0.9866
## ivhxNunca                      0.5397      1.853    0.4148    0.7022
## ivhxAlgunas veces              0.8837      1.132    0.7009    1.1142
## ndrugtx_cat2ndrugtx_menos_4    0.7446      1.343    0.6197    0.8945
## raceNegra/Hispánica            0.8591      1.164    0.7124    1.0360
## treatSí                        0.7866      1.271    0.6596    0.9379
## fentanylNo                     0.7573      1.320    0.6214    0.9230
## 
## Concordance= 0.627  (se = 0.013 )
## Likelihood ratio test= 102  on 8 df,   p=<0.0000000000000002
## Wald test            = 95.8  on 8 df,   p=<0.0000000000000002
## Score (logrank) test = 99.6  on 8 df,   p=<0.0000000000000002

Como la hipótesis es que, si fuera posible identificar sujetos con bajas chances de recaer luego de un tratamiento, vamos a modificar las referencias de algunas categorias.

data <- data |>
  mutate(fentanyl = relevel(fentanyl, ref = "Sí"))

data <- data |>
  mutate(ivhx = relevel(ivhx, ref = "Siempre"))

cox_model_4 <- coxph(
  tiempo_evento ~ age_cat2 + beck_cat2 + ivhx + ndrugtx_cat2 + 
    race + treat + fentanyl,
  data = data
)
summary(cox_model_4)
## Call:
## coxph(formula = tiempo_evento ~ age_cat2 + beck_cat2 + ivhx + 
##     ndrugtx_cat2 + race + treat + fentanyl, data = data)
## 
##   n= 628, number of events= 508 
## 
##                                 coef exp(coef) se(coef)      z   Pr(>|z|)    
## age_cat2age_mas_40          -0.56329   0.56934  0.14332 -3.930 0.00008481 ***
## beck_cat2Minimo_Leve        -0.19019   0.82680  0.09015 -2.110    0.03488 *  
## ivhxNunca                   -0.61668   0.53973  0.13430 -4.592 0.00000439 ***
## ivhxAlgunas veces           -0.12364   0.88370  0.11825 -1.046    0.29576    
## ndrugtx_cat2ndrugtx_menos_4 -0.29495   0.74457  0.09363 -3.150    0.00163 ** 
## raceNegra/Hispánica         -0.15186   0.85911  0.09555 -1.589    0.11196    
## treatSí                     -0.24008   0.78657  0.08978 -2.674    0.00749 ** 
## fentanylNo                  -0.27801   0.75729  0.10094 -2.754    0.00589 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                             exp(coef) exp(-coef) lower .95 upper .95
## age_cat2age_mas_40             0.5693      1.756    0.4299    0.7540
## beck_cat2Minimo_Leve           0.8268      1.209    0.6929    0.9866
## ivhxNunca                      0.5397      1.853    0.4148    0.7022
## ivhxAlgunas veces              0.8837      1.132    0.7009    1.1142
## ndrugtx_cat2ndrugtx_menos_4    0.7446      1.343    0.6197    0.8945
## raceNegra/Hispánica            0.8591      1.164    0.7124    1.0360
## treatSí                        0.7866      1.271    0.6596    0.9379
## fentanylNo                     0.7573      1.320    0.6214    0.9230
## 
## Concordance= 0.627  (se = 0.013 )
## Likelihood ratio test= 102  on 8 df,   p=<0.0000000000000002
## Wald test            = 95.8  on 8 df,   p=<0.0000000000000002
## Score (logrank) test = 99.6  on 8 df,   p=<0.0000000000000002

Comprobación de supuestos: cox_model_4

Test de proporcionalidad de riesgos de Schoenfeld.

test_ph_cox_model_4 <- cox.zph(cox_model_4)
test_ph_cox_model_4
##               chisq df     p
## age_cat2      0.262  1 0.609
## beck_cat2     2.301  1 0.129
## ivhx          0.209  2 0.901
## ndrugtx_cat2  1.254  1 0.263
## race          2.555  1 0.110
## treat         2.805  1 0.094
## fentanyl      0.826  1 0.363
## GLOBAL       10.117  8 0.257

✅ Conclusión:

Gráficos por variables de residuos de Schoenfeld en el tiempo.

ggcoxzph(test_ph_cox_model_4)

✅ Conclusión:

Gráficos Log-log.

Gráficos log(-log) para la variable ivhx.

fit_ivhx <- survfit(Surv(time, relapse) ~ ivhx, data = data)

ggsurvplot(
  fit_ivhx,
  data        = data,                
  fun         = "cloglog",
  xlab        = "Tiempo (días)",
  ylab        = "log(-log(S(t)))",
  legend.title= "Uso EV",
  legend.labs = levels(data$ivhx),
  conf.int    = FALSE,
  palette     = "simpsons"
)

Gráficos log(-log) para la variable age_cat2.

fit_age <- survfit(Surv(time, relapse) ~ age_cat2, data = data)

ggsurvplot(
  fit_age,
  data         = data,
  fun          = "cloglog",
  xlab         = "Tiempo (días)",
  ylab         = "log(-log(S(t)))",
  legend.title = "Edad (cat)",
  legend.labs  = levels(data$age_cat2),
  conf.int     = FALSE,
  palette      = "simpsons"
)

Gráficos log(-log) para la variable beck_cat2.

fit_beck <- survfit(Surv(time, relapse) ~ beck_cat2, data = data)

ggsurvplot(
  fit_beck,
  data         = data,
  fun          = "cloglog",
  xlab         = "Tiempo (días)",
  ylab         = "log(-log(S(t)))",
  legend.title = "Beck (cat)",
  legend.labs  = levels(data$beck_cat2),
  conf.int     = FALSE,
  palette      = "simpsons"
)

Gráficos log(-log) para la variable ndrugtx_cat2.

fit_ndrug <- survfit(Surv(time, relapse) ~ ndrugtx_cat2, data = data)

ggsurvplot(
  fit_ndrug,
  data         = data,
  fun          = "cloglog",
  xlab         = "Tiempo (días)",
  ylab         = "log(-log(S(t)))",
  legend.title = "N° tratamientos (cat)",
  legend.labs  = levels(data$ndrugtx_cat2),
  conf.int     = FALSE,
  palette      = "simpsons"
)

Gráficos log(-log) para la variable `race``.

fit_race <- survfit(Surv(time, relapse) ~ race, data = data)

ggsurvplot(
  fit_race,
  data         = data,
  fun          = "cloglog",
  xlab         = "Tiempo (días)",
  ylab         = "log(-log(S(t)))",
  legend.title = "Raza",
  legend.labs  = levels(data$race),
  conf.int     = FALSE,
  palette      = "simpsons"
)

Gráficos log(-log) para la variable `treat``.

fit_treat <- survfit(Surv(time, relapse) ~ treat, data = data)

ggsurvplot(
  fit_treat,
  data         = data,
  fun          = "cloglog",
  xlab         = "Tiempo (días)",
  ylab         = "log(-log(S(t)))",
  legend.title = "Tratamiento",
  legend.labs  = levels(data$treat),
  conf.int     = FALSE,
  palette      = "simpsons"
)

Gráficos log(-log) para la variable `fentanyl``.

fit_fentanyl <- survfit(Surv(time, relapse) ~ fentanyl, data = data)

ggsurvplot(
  fit_fentanyl,
  data         = data,
  fun          = "cloglog",
  xlab         = "Tiempo (días)",
  ylab         = "log(-log(S(t)))",
  legend.title = "Fentanilo",
  legend.labs  = levels(data$fentanyl),
  conf.int     = FALSE,
  palette      = "simpsons"
)

Residuos de Martingale y Deviance.

a.- Martingale.

ggcoxdiagnostics(cox_model_4, type = "martingale", linear.predictions = FALSE)
## `geom_smooth()` using formula = 'y ~ x'

✅ Conclusión:

b.- Deviance.

ggcoxdiagnostics(cox_model_4, type = "deviance", linear.predictions = FALSE)
## `geom_smooth()` using formula = 'y ~ x'

✅ No hay evidencia de un patrón global de mal ajuste.

⚠️ Sí hay algunos casos que se salen de ±2 en los residuos, que podrían investigarse individualmente.

Evaluación de Discriminación y Calibración del modelo.

Discriminación.

Indice C de Harrell.

summary(cox_model_4)$concordance
##          C      se(C) 
## 0.62679285 0.01298882

✅ Conclusión:

Indice D de Somer (Dxy), Metricas Q D U, slope y R2 con paquete rms.

dd <- datadist(data); options(datadist = "dd")
cox_metrics <- cph(tiempo_evento ~ age_cat2 + beck_cat2 + ivhx + ndrugtx_cat2 + race + treat + fentanyl, data = data, x = TRUE, y = TRUE, surv = TRUE)

Validamos métricas con bootstrap.

invisible(capture.output({ #esta linea es para evitar que se imprima el output del bootstrap)
  cox_rms <- validate(cox_metrics, method = "boot", B = 200)
}))

Métricas de discriminación y calibración (validadas con bootstrap).

Revisamos las metricas validadas (en la columna index.corrected).

cox_rms
##       index.orig training   test optimism index.corrected   n
## Dxy       0.2536   0.2604 0.2461   0.0143          0.2393 200
## R2        0.1499   0.1588 0.1409   0.0180          0.1320 200
## Slope     1.0000   1.0000 0.9382   0.0618          0.9382 200
## D         0.0172   0.0183 0.0160   0.0023          0.0149 200
## U        -0.0003  -0.0003 0.0003  -0.0006          0.0003 200
## Q         0.0175   0.0187 0.0158   0.0029          0.0146 200
## g         0.5470   0.5641 0.5242   0.0400          0.5070 200

Para visualizar más fácil podemos pedir el output solo la columna que nos interesa y redondear a 3 decimales.

round(cox_rms[,5],3)
##   Dxy    R2 Slope     D     U     Q     g 
## 0.239 0.132 0.938 0.015 0.000 0.015 0.507

🔎 Conclusión:

Grafiquemos la curva de calibración.

Calibración. Evaluación Gráfica.

invisible(capture.output({
cal <- calibrate(cox_metrics, method = "boot", B = 200, u = 180)
}))
plot(cal, 
     subtitles = FALSE,
     lwd = 2, 
     lty = 2, 
     main = "Gráfico de Calibración",
     cex.main = 1.2,
     cex.axis = 0.7,
     cex.lab = 0.9,
     xlab = "Probabilidad Predicha de Recaída a 180 días",
     ylab = "Recaída Observada")

Tabla de cox_model_4

tab_model(cox_model_4, show.se = T, show.dev = T, show.loglik = T, show.intercept = F,show.stat = F, show.reflvl = F, digits = 2, digits.p = 3, show.r2 = F, collapse.ci = F)
  tiempo evento
Predictors Estimates std. Error CI p
age cat2 [age_mas_40] 0.57 0.08 0.43 – 0.75 <0.001
beck cat2 [Minimo_Leve] 0.83 0.07 0.69 – 0.99 0.035
ivhx [Nunca] 0.54 0.07 0.41 – 0.70 <0.001
ivhx [Algunas veces] 0.88 0.10 0.70 – 1.11 0.296
ndrugtx cat2
[ndrugtx_menos_4]
0.74 0.07 0.62 – 0.89 0.002
race [Negra/Hispánica] 0.86 0.08 0.71 – 1.04 0.112
treat [Sí] 0.79 0.07 0.66 – 0.94 0.007
fentanyl [No] 0.76 0.08 0.62 – 0.92 0.006
Observations 628
log-Likelihood -2891.628

11. Creamos una variable evento con 3 niveles: censura, evento de interes, evento competitivo.

En este TH no hay TIMEDEATH ni TIMERELAPSE; time ya es el tiempo al primer evento (o a la censura). Por eso con las banderas relapse/death alcanza para codificar evento. Idealmente no debería haber filas con ambas en 1.

Como tenemos dos eventos en este modelo, vamos a crear una variable que los incluya a los dos:

with(data, table(relapse, death, useNA = "ifany"))
##        death
## relapse   0   1
##       0  74  46
##       1 508   0
options(scipen = 999)
data <- data %>%
  mutate(
    evento = case_when(
      relapse == 1 & death == 0 ~ 1L,  # evento de interés (recaída)
      relapse == 0 & death == 1 ~ 2L,  # evento competitivo (muerte)
      relapse == 0 & death == 0 ~ 0L,  # censura
      TRUE ~ NA_integer_              # por si aparece algo raro
    ),
    evento = factor(evento,
                    levels = c(0L, 1L, 2L),
                    labels = c("Censura", "Recaída", "Muerte"))
  )
dplyr::count(data, evento)
##    evento   n
## 1 Censura  74
## 2 Recaída 508
## 3  Muerte  46

Pasamos los días a meses y años y asignamos el tiempo hasta el evento correspondiente.

data <- data %>%
  dplyr::mutate(
    time_days = time,
    time_yr   = time / 365.25,
    time_mo   = time / 30.4375
  )

12. Ajustamos un modelo de Cox para evento de interes y evento competitivo.

Entonces, este chunk ajusta un Cox causa-específico para el evento 1 (relapse):

# 'evento' quedó como factor, creamos un numérico 0/1/2 para modelar (0=censura, 1=recaída, 2=muerte).

data <- data |> 
  mutate(
    status = case_when(
      relapse == 1              ~ 1L,  # evento de interés
      death   == 1 & relapse==0 ~ 2L,  # evento competitivo
      TRUE                      ~ 0L   # censura
    )
  )
  
# Cox causa-específico: Recaída (status==1).
cox_CSC_recaida <- coxph( 
  Surv(time_days, status == 1) ~
    age_cat2 + beck_cat2 + ivhx + ndrugtx_cat2 + race + treat + fentanyl,
  data = data
)
summary(cox_CSC_recaida)
## Call:
## coxph(formula = Surv(time_days, status == 1) ~ age_cat2 + beck_cat2 + 
##     ivhx + ndrugtx_cat2 + race + treat + fentanyl, data = data)
## 
##   n= 628, number of events= 508 
## 
##                                 coef exp(coef) se(coef)      z   Pr(>|z|)    
## age_cat2age_mas_40          -0.56329   0.56934  0.14332 -3.930 0.00008481 ***
## beck_cat2Minimo_Leve        -0.19019   0.82680  0.09015 -2.110    0.03488 *  
## ivhxNunca                   -0.61668   0.53973  0.13430 -4.592 0.00000439 ***
## ivhxAlgunas veces           -0.12364   0.88370  0.11825 -1.046    0.29576    
## ndrugtx_cat2ndrugtx_menos_4 -0.29495   0.74457  0.09363 -3.150    0.00163 ** 
## raceNegra/Hispánica         -0.15186   0.85911  0.09555 -1.589    0.11196    
## treatSí                     -0.24008   0.78657  0.08978 -2.674    0.00749 ** 
## fentanylNo                  -0.27801   0.75729  0.10094 -2.754    0.00589 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                             exp(coef) exp(-coef) lower .95 upper .95
## age_cat2age_mas_40             0.5693      1.756    0.4299    0.7540
## beck_cat2Minimo_Leve           0.8268      1.209    0.6929    0.9866
## ivhxNunca                      0.5397      1.853    0.4148    0.7022
## ivhxAlgunas veces              0.8837      1.132    0.7009    1.1142
## ndrugtx_cat2ndrugtx_menos_4    0.7446      1.343    0.6197    0.8945
## raceNegra/Hispánica            0.8591      1.164    0.7124    1.0360
## treatSí                        0.7866      1.271    0.6596    0.9379
## fentanylNo                     0.7573      1.320    0.6214    0.9230
## 
## Concordance= 0.627  (se = 0.013 )
## Likelihood ratio test= 102  on 8 df,   p=<0.0000000000000002
## Wald test            = 95.8  on 8 df,   p=<0.0000000000000002
## Score (logrank) test = 99.6  on 8 df,   p=<0.0000000000000002

Recaída (status==1).

n = 628, eventos = 508, concordancia ≈ 0.63 (discriminación moderada).

Interpretación: efecto sobre el hazard de recaer en cada día de seguimiento, entre quienes siguen vivos y sin recaer.

Idea general: más edad, menor severidad (Beck), no inyectarse, menos tratamientos previos, recibir tratamiento y no usar fentanilo se asocian con menor riesgo instantáneo de recaer.

Cox causa-específico: Muerte (status==2).

cox_CSC_muerte <- coxph( 
  Surv(time_days, status == 2) ~
    age_cat2 + beck_cat2 + ivhx + ndrugtx_cat2 + race + treat + fentanyl,
  data = data
)
summary(cox_CSC_muerte)
## Call:
## coxph(formula = Surv(time_days, status == 2) ~ age_cat2 + beck_cat2 + 
##     ivhx + ndrugtx_cat2 + race + treat + fentanyl, data = data)
## 
##   n= 628, number of events= 46 
## 
##                                 coef exp(coef) se(coef)      z  Pr(>|z|)    
## age_cat2age_mas_40           0.16097   1.17465  0.40637  0.396    0.6920    
## beck_cat2Minimo_Leve         0.24529   1.27799  0.33856  0.724    0.4688    
## ivhxNunca                    1.55003   4.71160  0.57879  2.678    0.0074 ** 
## ivhxAlgunas veces            0.59307   1.80953  0.48982  1.211    0.2260    
## ndrugtx_cat2ndrugtx_menos_4 -0.76331   0.46612  0.36373 -2.099    0.0359 *  
## raceNegra/Hispánica         -0.03424   0.96634  0.41676 -0.082    0.9345    
## treatSí                      0.05348   1.05493  0.30878  0.173    0.8625    
## fentanylNo                  -2.15792   0.11556  0.53337 -4.046 0.0000521 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                             exp(coef) exp(-coef) lower .95 upper .95
## age_cat2age_mas_40             1.1747     0.8513   0.52967    2.6050
## beck_cat2Minimo_Leve           1.2780     0.7825   0.65818    2.4815
## ivhxNunca                      4.7116     0.2122   1.51532   14.6498
## ivhxAlgunas veces              1.8095     0.5526   0.69283    4.7261
## ndrugtx_cat2ndrugtx_menos_4    0.4661     2.1454   0.22850    0.9508
## raceNegra/Hispánica            0.9663     1.0348   0.42695    2.1872
## treatSí                        1.0549     0.9479   0.57596    1.9322
## fentanylNo                     0.1156     8.6532   0.04063    0.3287
## 
## Concordance= 0.759  (se = 0.044 )
## Likelihood ratio test= 25.16  on 8 df,   p=0.001
## Wald test            = 24.93  on 8 df,   p=0.002
## Score (logrank) test = 28.18  on 8 df,   p=0.0004

Muerte (status==2).

n = 628, eventos = 46, concordancia ≈ 0.76 (buena, pero con pocos eventos → CIs amplios).

Interpretación: efecto sobre el hazard de morir en cada día de seguimiento, entre quienes no han recaído.

Idea general: en muerte, lo más marcado es no usar fentanilo (fuertemente protector) y tener menos tratamientos previos (protector). El contraste ivhx Nunca muestra mayor riesgo, pero recordá que hay solo 46 defunciones → la precisión es limitada y conviene mirarlo con cautela.

Recordatorios importantes.

Estos son hazards causa-específicos: el “otro” evento se censura. Sirven para entender mecanismos y para simulación de procesos, pero no responden directamente “¿cuál es la probabilidad acumulada de …?” (para eso vienen la CIF y el Fine–Gray).

13. Análisis de la Incidencia Acumulada de Evento \(CIF\) según una variable de interés: treat.

\(CIF\) estratificada según tratamiento.

cif_treat <- cuminc(Surv(time_mo, factor(status)) ~ treat, data = data)
cif_treat
## 
## ── cuminc() ────────────────────────────────────────────────────────────────────
## • Failure type "1"
## strata          time   n.risk   estimate   std.error   95% CI          
## No o aislados   5.00   138      0.558      0.028       0.502, 0.611    
## No o aislados   10.0   70       0.774      0.024       0.724, 0.817    
## No o aislados   15.0   56       0.819      0.022       0.771, 0.857    
## No o aislados   20.0   8        0.846      0.021       0.798, 0.882    
## Sí              5.00   184      0.386      0.028       0.332, 0.441    
## Sí              10.0   99       0.664      0.027       0.608, 0.715    
## Sí              15.0   68       0.767      0.024       0.715, 0.811    
## Sí              20.0   9        0.795      0.023       0.745, 0.837
## • Failure type "2"
## strata          time   n.risk   estimate   std.error   95% CI          
## No o aislados   5.00   138      0.003      0.003       0.000, 0.016    
## No o aislados   10.0   70       0.003      0.003       0.000, 0.016    
## No o aislados   15.0   56       0.003      0.003       0.000, 0.016    
## No o aislados   20.0   8        0.087      0.020       0.054, 0.131    
## Sí              5.00   184      0.007      0.005       0.001, 0.022    
## Sí              10.0   99       0.007      0.005       0.001, 0.022    
## Sí              15.0   68       0.007      0.005       0.001, 0.022    
## Sí              20.0   9        0.097      0.020       0.062, 0.140
## • Tests
## outcome   statistic   df     p.value    
## 1         8.23        1.00   0.004      
## 2         0.619       1.00   0.43

Recaída (type 1).

La columna estimate es la incidencia acumulada (CIF) en cada tiempo.

A 20 meses, aproximadamente:

Interpretación: quienes no recibieron tratamiento previo (o quedaron aislados) presentan mayor riesgo acumulado de recaída que quienes sí tuvieron tratamiento previo. La diferencia absoluta ronda los 4–5 puntos porcentuales a 20 meses.

Muerte (type 2).

Los riesgos absolutos son bajos en general dentro de la ventana analizada. A 20 meses se observa del orden de <1%–10%, con mucha incertidumbre en las estimaciones.

Prueba de Gray.

Abajo, en Tests, te da un p-valor para cada resultado:

Conclusión práctica.

Hacemos el gráfico similar al de Kaplan-Meier pero en vez de \(S(t)\) usamos \(CIF\).

p <- ggcuminc(cif_treat, outcome = c(1, 2), linetype_aes = TRUE) +
  scale_color_manual(
    name   = "Tratamiento previo",
    values = c("No/aislados" = "#56B4E9",   # celeste
               "Sí"            = "#E69F00"),  # amarillo
    labels = c("No/aislados", "Sí")
  ) +
  scale_linetype_manual(
    name   = "Tipo de evento",
    values = c("solid", "dashed"),
    labels = c("Recaída (evento)", "Competitivo (muerte)")
  ) +
  labs(
    x = "Meses", y = "Incidencia acumulada (CIF)",
    title = "CIF de recaída y muerte por tratamiento previo"
  ) +
  add_risktable(
    tables_theme = ggplot2::theme(
      text      = element_text(size = 10),
      axis.text = element_text(size = 10)
    )
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")

p
## Warning in ggplot2::geom_text(size = 3.5, hjust = 0.5, vjust = 0.5, angle = 0, : Ignoring unknown parameters: `tables_theme`
## Ignoring unknown parameters: `tables_theme`

Recaída (evento 1): patrón claro.

Evento competitivo (muerte).

Coherencia con los modelos.

Conclusión práctica.

6. Modelos competitivos.

Ajustar modelo con CSC (Cause-Specific Cox).

data <- data |>
  dplyr::mutate(
    status = factor(status, levels = c(0, 1, 2)),  # 0=cens, 1=recaída, 2=muerte
    treat  = stats::relevel(treat, ref = "No o aislados")     # referencia: "No o aislados" 
  )

modelo_csc <- riskRegression::CSC(
  Hist(time_mo, status) ~ age_cat2 + beck_cat2 + ivhx + ndrugtx_cat2 + race + treat + fentanyl,
  data  = data,
  cause = 1
)
## Warning in FUN(X[[i]], ...): Variables named time in data will be ignored.
## Warning in FUN(X[[i]], ...): Variables named time in data will be ignored.
modelo_csc
## riskRegression::CSC(formula = Hist(time_mo, status) ~ age_cat2 + 
##     beck_cat2 + ivhx + ndrugtx_cat2 + race + treat + fentanyl, 
##     data = data, cause = 1)
## 
## Right-censored response of a competing.risks model
## 
## No.Observations: 628 
## 
## Pattern:
##          
## Cause     event right.censored
##   1         508              0
##   2          46              0
##   unknown     0             74
## 
## 
## ----------> Cause:  1 
## 
## Call:
## coxph(formula = survival::Surv(time, status) ~ age_cat2 + beck_cat2 + 
##     ivhx + ndrugtx_cat2 + race + treat + fentanyl, x = TRUE, 
##     y = TRUE)
## 
##   n= 628, number of events= 508 
## 
##                                 coef exp(coef) se(coef)      z   Pr(>|z|)    
## age_cat2age_mas_40          -0.56329   0.56934  0.14332 -3.930 0.00008481 ***
## beck_cat2Minimo_Leve        -0.19019   0.82680  0.09015 -2.110    0.03488 *  
## ivhxNunca                   -0.61668   0.53973  0.13430 -4.592 0.00000439 ***
## ivhxAlgunas veces           -0.12364   0.88370  0.11825 -1.046    0.29576    
## ndrugtx_cat2ndrugtx_menos_4 -0.29495   0.74457  0.09363 -3.150    0.00163 ** 
## raceNegra/Hispánica         -0.15186   0.85911  0.09555 -1.589    0.11196    
## treatSí                     -0.24008   0.78657  0.08978 -2.674    0.00749 ** 
## fentanylNo                  -0.27801   0.75729  0.10094 -2.754    0.00589 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                             exp(coef) exp(-coef) lower .95 upper .95
## age_cat2age_mas_40             0.5693      1.756    0.4299    0.7540
## beck_cat2Minimo_Leve           0.8268      1.209    0.6929    0.9866
## ivhxNunca                      0.5397      1.853    0.4148    0.7022
## ivhxAlgunas veces              0.8837      1.132    0.7009    1.1142
## ndrugtx_cat2ndrugtx_menos_4    0.7446      1.343    0.6197    0.8945
## raceNegra/Hispánica            0.8591      1.164    0.7124    1.0360
## treatSí                        0.7866      1.271    0.6596    0.9379
## fentanylNo                     0.7573      1.320    0.6214    0.9230
## 
## Concordance= 0.627  (se = 0.013 )
## Likelihood ratio test= 102  on 8 df,   p=<0.0000000000000002
## Wald test            = 95.8  on 8 df,   p=<0.0000000000000002
## Score (logrank) test = 99.6  on 8 df,   p=<0.0000000000000002
## 
## 
## 
## ----------> Cause:  2 
## 
## Call:
## coxph(formula = survival::Surv(time, status) ~ age_cat2 + beck_cat2 + 
##     ivhx + ndrugtx_cat2 + race + treat + fentanyl, x = TRUE, 
##     y = TRUE)
## 
##   n= 628, number of events= 46 
## 
##                                 coef exp(coef) se(coef)      z  Pr(>|z|)    
## age_cat2age_mas_40           0.16097   1.17465  0.40637  0.396    0.6920    
## beck_cat2Minimo_Leve         0.24529   1.27799  0.33856  0.724    0.4688    
## ivhxNunca                    1.55003   4.71160  0.57879  2.678    0.0074 ** 
## ivhxAlgunas veces            0.59307   1.80953  0.48982  1.211    0.2260    
## ndrugtx_cat2ndrugtx_menos_4 -0.76331   0.46612  0.36373 -2.099    0.0359 *  
## raceNegra/Hispánica         -0.03424   0.96634  0.41676 -0.082    0.9345    
## treatSí                      0.05348   1.05493  0.30878  0.173    0.8625    
## fentanylNo                  -2.15792   0.11556  0.53337 -4.046 0.0000521 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                             exp(coef) exp(-coef) lower .95 upper .95
## age_cat2age_mas_40             1.1747     0.8513   0.52967    2.6050
## beck_cat2Minimo_Leve           1.2780     0.7825   0.65818    2.4815
## ivhxNunca                      4.7116     0.2122   1.51532   14.6498
## ivhxAlgunas veces              1.8095     0.5526   0.69283    4.7261
## ndrugtx_cat2ndrugtx_menos_4    0.4661     2.1454   0.22850    0.9508
## raceNegra/Hispánica            0.9663     1.0348   0.42695    2.1872
## treatSí                        1.0549     0.9479   0.57596    1.9322
## fentanylNo                     0.1156     8.6532   0.04063    0.3287
## 
## Concordance= 0.759  (se = 0.044 )
## Likelihood ratio test= 25.16  on 8 df,   p=0.001
## Wald test            = 24.93  on 8 df,   p=0.002
## Score (logrank) test = 28.18  on 8 df,   p=0.0004

Causa 1 – Recaída (n eventos = 508).

Efectos sobre la tasa de riesgo causa-específica de recaída (referencia de treat = No o aislados).

Causa 2 – Muerte (evento competitivo; n eventos = 46).

Efectos sobre la tasa de muerte causa-específica:

Cómo leer todo junto:

Evaluar performance predictiva con Score().

score_csc <- Score(
  object    = list("CSC model" = modelo_csc),
  formula   = Hist(time_mo, status) ~ 1,
  data      = data,
  times     = c(6, 12, 18, 24),              # horizontes en meses
  metrics   = c("auc", "brier"),
  summary   = "ipa",
  cause     = 1,
  cens.model= "km"
)
## Upper limit of followup is 23.6878850102669
## Results at times higher than 23.6878850102669 are not computed.
score_csc$AUC$score    # AUC(t) con IC
##        model times       AUC         se     lower     upper
##       <fctr> <num>     <num>      <num>     <num>     <num>
## 1: CSC model     6 0.6568268 0.02181559 0.6140691 0.6995846
## 2: CSC model    12 0.7239139 0.02343066 0.6779906 0.7698371
## 3: CSC model    18 0.7781091 0.02381739 0.7314279 0.8247904
score_csc$Brier$score  # Brier e IPA
##         model times     Brier          se     lower     upper        IPA
##        <fctr> <num>     <num>       <num>     <num>     <num>      <num>
## 1: Null model     6 0.2481173 0.001737026 0.2447128 0.2515218 0.00000000
## 2: Null model    12 0.1826770 0.008933676 0.1651673 0.2001866 0.00000000
## 3: Null model    18 0.1508811 0.009862087 0.1315517 0.1702104 0.00000000
## 4:  CSC model     6 0.2306704 0.006114098 0.2186870 0.2426538 0.07031715
## 5:  CSC model    12 0.1613630 0.008372348 0.1449535 0.1777725 0.11667555
## 6:  CSC model    18 0.1279340 0.008316170 0.1116346 0.1442334 0.15208726

AUC(t)

✅ Conclusión: la discriminación mejora con el tiempo, coherente con lo que vimos en CIF/KM.

Brier e IPA (vs “Null model”)

✅ Conclusión: desde 12 m el modelo ya ofrece mejora moderada, y a 18 m mejora un poco más (≈15%). A 6 m la ganancia es tenue.

En síntesis:

Ajustar modelo Fine & Gray para relapse.

modelo_fg <- FGR(
  formula = Hist(time_mo, status) ~ age_cat2 + beck_cat2 + ivhx +
                                   ndrugtx_cat2 + race + treat + fentanyl,
  data    = data,
  cause   = 1   # 1 = recaída (el evento de interés)
)
summary(modelo_fg)   # SHR (subdistribution hazard ratios)
## Competing Risks Regression
## 
## Call:
## FGR(formula = Hist(time_mo, status) ~ age_cat2 + beck_cat2 + 
##     ivhx + ndrugtx_cat2 + race + treat + fentanyl, data = data, 
##     cause = 1)
## 
##                               coef exp(coef) se(coef)     z   p-value
## age_cat2age_mas_40          -0.563     0.570   0.1376 -4.09 0.0000430
## beck_cat2Minimo_Leve        -0.182     0.833   0.0891 -2.05 0.0410000
## ivhxNunca                   -0.650     0.522   0.1372 -4.74 0.0000021
## ivhxAlgunas veces           -0.164     0.849   0.1195 -1.37 0.1700000
## ndrugtx_cat2ndrugtx_menos_4 -0.265     0.768   0.0931 -2.84 0.0045000
## raceNegra/Hispánica         -0.159     0.853   0.0907 -1.76 0.0790000
## treatSí                     -0.231     0.794   0.0884 -2.61 0.0090000
## fentanylNo                  -0.251     0.778   0.1005 -2.49 0.0130000
## 
##                             exp(coef) exp(-coef)  2.5% 97.5%
## age_cat2age_mas_40              0.570       1.76 0.435 0.746
## beck_cat2Minimo_Leve            0.833       1.20 0.700 0.992
## ivhxNunca                       0.522       1.92 0.399 0.683
## ivhxAlgunas veces               0.849       1.18 0.672 1.073
## ndrugtx_cat2ndrugtx_menos_4     0.768       1.30 0.640 0.921
## raceNegra/Hispánica             0.853       1.17 0.714 1.019
## treatSí                         0.794       1.26 0.668 0.944
## fentanylNo                      0.778       1.28 0.639 0.948
## 
## Num. cases = 628
## Pseudo Log-likelihood = -2900 
## Pseudo likelihood ratio test = 101  on 8 df,

Ese output es de Fine & Gray (FGR) para recaída como causa 1.

Lo que entrega son subdistribution hazard ratios (SHR): miden el efecto de cada covariable sobre la incidencia acumulada de recaída (CIF) teniendo en cuenta que la muerte compite. Regla de lectura: SHR < 1 ⇒ menor CIF (protector); SHR > 1 ⇒ mayor CIF (riesgo), manteniendo el resto constante.

Qué muestran tus números (redondeo visual del pantallazo).

Cómo encaja con lo anterior (CSC vs FGR).

Evaluar performance predictiva con Score.

# AUC(t) y Brier/IPA a 6, 12, 18 y 24 meses
score_fg <- Score(
  object  = list("FG model" = modelo_fg),
  formula = Hist(time_mo, status) ~ 1,
  data    = data,
  times   = c(6, 12, 18, 24),
  metrics = c("auc","brier"),
  summary = "ipa",
  cause   = 1,
  cens.model = "km"
)
## Upper limit of followup is 23.6878850102669
## Results at times higher than 23.6878850102669 are not computed.
# Tablas limpias
score_fg$AUC$score[, c("times","AUC","se","lower","upper")]
##    times       AUC         se     lower     upper
##    <num>     <num>      <num>     <num>     <num>
## 1:     6 0.6569608 0.02181983 0.6141947 0.6997268
## 2:    12 0.7243855 0.02344140 0.6784411 0.7703298
## 3:    18 0.7796314 0.02363633 0.7333050 0.8259577
score_fg$Brier$score[, c("model","times","Brier","se","lower","upper","IPA")]
##         model times     Brier          se     lower     upper        IPA
##        <fctr> <num>     <num>       <num>     <num>     <num>      <num>
## 1: Null model     6 0.2481173 0.001737026 0.2447128 0.2515218 0.00000000
## 2: Null model    12 0.1826770 0.008933676 0.1651673 0.2001866 0.00000000
## 3: Null model    18 0.1508811 0.009862087 0.1315517 0.1702104 0.00000000
## 4:   FG model     6 0.2305472 0.006058507 0.2186728 0.2424217 0.07081356
## 5:   FG model    12 0.1615044 0.008356300 0.1451264 0.1778824 0.11590163
## 6:   FG model    18 0.1279367 0.008286530 0.1116954 0.1441780 0.15206936

Comparar FGR vs CSC (discriminación).

score_comp <- Score(
  object  = list("FG" = modelo_fg, "CSC" = modelo_csc),
  formula = Hist(time_mo, status) ~ 1,
  data    = data,
  times   = c(6, 12, 18, 24),
  metrics = "auc",
  cause   = 1,
  cens.model = "km"
)
## Upper limit of followup is 23.6878850102669
## Results at times higher than 23.6878850102669 are not computed.
# AUC(t) de ambos
score_comp$AUC$score[, c("model","times","AUC","lower","upper")]
##     model times       AUC     lower     upper
##    <fctr> <num>     <num>     <num>     <num>
## 1:     FG     6 0.6569608 0.6141947 0.6997268
## 2:     FG    12 0.7243855 0.6784411 0.7703298
## 3:     FG    18 0.7796314 0.7333050 0.8259577
## 4:    CSC     6 0.6568268 0.6140691 0.6995846
## 5:    CSC    12 0.7239139 0.6779906 0.7698371
## 6:    CSC    18 0.7781091 0.7314279 0.8247904
# Diferencias de AUC y p (test global por tiempo)
score_comp$AUC$contrasts
##    times  model reference     delta.AUC           se        lower        upper
##    <num> <fctr>    <fctr>         <num>        <num>        <num>        <num>
## 1:     6    CSC        FG -0.0001339398 0.0009094530 -0.001916435 0.0016485552
## 2:    12    CSC        FG -0.0004715812 0.0009811247 -0.002394550 0.0014513879
## 3:    18    CSC        FG -0.0015222391 0.0010230232 -0.003527328 0.0004828495
##            p
##        <num>
## 1: 0.8829148
## 2: 0.6307627
## 3: 0.1367559

Fine–Gray (FG, causa 1 = recaída).

Comparación FG vs Cause-Specific Cox (CSC).

Lo ponemos visual: extraigo las IPAs por tiempo y modelo y lo grafico.

score_comp <- Score(
  list("FG model" = modelo_fg, "CSC model" = modelo_csc),
  formula     = Hist(time_mo, status) ~ 1,   
  data        = data,
  times       = c(6, 12, 18),                 # los horizontes que se venían usando
  metrics     = c("auc", "brier"),
  summary     = "IPA",
  cause       = 1,
  cens.model  = "km"
)

tmp <- as.data.frame(score_comp$Brier$score)
ipa_col <- grep("^IPA", names(tmp), value = TRUE)   # captura "IPA" o "IPA.*"
ipa <- tmp[, c("model", "times", ipa_col)]
names(ipa)[3] <- "IPA"

ggplot(ipa, aes(x = times, y = IPA, color = model)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 3) +
  geom_text(aes(label = percent(IPA, accuracy = 0.1)),
            vjust = -0.8, size = 3, show.legend = FALSE) +
  scale_y_continuous(labels = percent_format(accuracy = 0.1), limits = c(0, NA)) +
  scale_color_manual(values = c("Null model"="gray40","FG model"="#2ECC71","CSC model"="#3498DB"),
                     drop = FALSE) +
  labs(title = "Índice de Precisión de Predicción (IPA) en el tiempo",
       x = "Meses", y = "IPA", color = "Modelo") +
  theme_minimal() +
  theme(legend.position = "bottom", plot.title.position = "plot")

Ese gráfico de IPA quedó bien y dice esto, en simple:

Lectura rápida

Extraigo los AUC por tiempo y modelo y grafico.

auc_df <- score_comp$AUC$score |>
  as.data.frame() |>
  dplyr::select(model, times, AUC) |>
  dplyr::mutate(
    model = dplyr::recode(
      model,
      "FG model"  = "FG (Fine–Gray)",
      "CSC model" = "CSC (Cox causa-específica)",
      "Null model"= "Modelo nulo"
    ),
    times = as.numeric(times)
  )

# Márgenes automáticos del eje Y
ymin <- floor((min(auc_df$AUC) - 0.02) * 100) / 100
ymax <- ceiling((max(auc_df$AUC) + 0.02) * 100) / 100

ggplot(auc_df, aes(x = times, y = AUC, color = model)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 3) +
  geom_hline(yintercept = 0.5, linetype = "dashed", alpha = .5) +  # Azar
  geom_hline(yintercept = 0.7, linetype = "dotted", alpha = .7) +  # Adecuado
  scale_color_manual(values = c(
    "Modelo nulo"                 = "grey40",
    "FG (Fine–Gray)"              = "#2ECC71",
    "CSC (Cox causa-específica)"  = "#3498DB"
  ), drop = FALSE) +
  scale_y_continuous(labels = scales::number_format(accuracy = 0.001),
                     limits = c(ymin, ymax),
                     breaks = seq(0.5, 1, 0.05)) +
  scale_x_continuous(breaks = sort(unique(auc_df$times))) +
  labs(
    title    = "Discriminación de modelos en el tiempo",
    subtitle = "Línea discontinua = azar (0,5) · Línea punteada = adecuada (0,7)",
    x = "Meses", y = "AUC (Área bajo la curva ROC)",
    color = "Modelo"
  ) +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_hline()`).

Lectura rápida del resultado.

Vamos con calibración.

Calibracion global.

s <- Score(
  object  = list(FG = modelo_fg, CSC = modelo_csc),
  formula = Hist(time_mo, status) ~ 1,   # <- mismos nombres que en el ajuste
  data    = data,
  plots   = "calibration",
  summary = "IPA",
  cause   = 1
)
plotCalibration(s, models = c("FG","CSC"))

Resumen rápido:

Conclusión práctica:

Calibracion tiempo especifico.

t <- Score(
  object  = list(FG = modelo_fg, CSC = modelo_csc),
  formula = Hist(time_mo, status) ~ 1,   
  data    = data,                        
  plots   = "calibration",
  summary = "IPA",
  times   = c(6, 12, 18),                # horizontes en meses 
  cause   = 1
)
plotCalibration(t, models = c("FG","CSC"), times = 6)   # 6 meses

plotCalibration(t, models = c("FG","CSC"), times = 12)  # 12 meses

plotCalibration(t, models = c("FG","CSC"), times = 18)  # 18 meses

Qué muestran:

Error de predicción (Brier(t))

Calibración

Más a fondo.