library(survival)
library(ggplot2)
library(cmprsk)
library(openxlsx)
library(kableExtra)
library(readxl)
library(survminer)
## Loading required package: ggpubr
## 
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
## 
##     myeloma
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::group_rows() masks kableExtra::group_rows()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(haven)
library(reshape2)
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(lubridate)
library(tidycmprsk)
## 
## Attaching package: 'tidycmprsk'
## 
## The following objects are masked from 'package:cmprsk':
## 
##     crr, cuminc
library(ggsurvfit)
library(gtsummary)
## 
## Attaching package: 'gtsummary'
## 
## The following object is masked from 'package:tidycmprsk':
## 
##     trial
#Ejemplo con melanoma
data(Melanoma, package = "MASS")
Melanoma <- 
  Melanoma %>% 
  mutate(
    status = as.factor(recode(status, `2` = 0, `1` = 1, `3` = 2))
  )
#Crear la variable para la supervivencia global
Melanoma$status1 = ifelse(Melanoma$status == 1, 1 ,0)

#Supervivencia global
# Calcular la superviviencia global
# Crear el objeto de supervivencia
surv_obj = Surv(time = Melanoma$time,
                event = Melanoma$status1)

# Ajustar el modelo de Kaplan-Meier
fit = survfit(surv_obj ~ 1,
              data = Melanoma)

# Generar el plot de Kaplan-Meier
g = ggsurvplot(fit,
               data = Melanoma,
               pval = FALSE,
               conf.int = FALSE,
               ggtheme = theme_bw(),
               palette = c("#76EEC6"),
               title = "Global Survival",
               xlab = "Days",
               ylab = "Survival Probability",
               xlim = c(0, max(Melanoma$time)))

#Añadir líneas verticales a los 0,1,3,5,10 y 15 años
g$plot = g$plot +
  geom_vline(xintercept = 0, color = "#7c7c7c", linetype = "dotted") +
  geom_vline(xintercept = 365, color = "#7c7c7c", linetype = "dotted") +
  geom_vline(xintercept = 1095, color = "#7c7c7c", linetype = "dotted") +
  geom_vline(xintercept = 1825, color = "#7c7c7c", linetype = "dotted") +
  geom_vline(xintercept = 3650, color = "#7c7c7c", linetype = "dotted") +
  geom_vline(xintercept = 5475, color = "#7c7c7c", linetype = "dotted")

g$plot = g$plot + scale_x_continuous(
  breaks = seq(from = 0, to = max(Melanoma$time), by = 365),
  expand = c(0,0) #Eliminas la expansión del eje x
)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
g

# Especificar los tiempos de interés
times = seq(from = 0, to = max(Melanoma$time), by = 365)

# Obtener el summary para esos tiempos específicos
summary_fit = summary(fit,
                      times = times)

# Convertir el summary a un dataframe
df_summary = data.frame(
  time = summary_fit$time,
  n.risk = summary_fit$n.risk,
  n.event = summary_fit$n.event,
  survival = round(summary_fit$surv, 2),
  lower = round(summary_fit$lower,2),
  upper = round(summary_fit$upper,2)
)

#Lo presentamos en una tabla
df_summary %>% kable(format = "html") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE, position = "center", font_size = 12) %>%
  column_spec(1, bold = TRUE, color = "#FF7F00") %>%
  row_spec(0, background = "#993155", color = "white")
time n.risk n.event survival lower upper
0 205 0 1.00 1.00 1.00
365 193 6 0.97 0.95 0.99
730 183 9 0.92 0.89 0.96
1095 167 15 0.85 0.80 0.90
1460 160 6 0.82 0.77 0.87
1825 122 9 0.77 0.71 0.83
2190 83 5 0.73 0.67 0.80
2555 64 3 0.70 0.63 0.77
2920 55 2 0.68 0.61 0.76
3285 38 1 0.66 0.59 0.75
3650 23 1 0.64 0.57 0.74
4015 12 0 0.64 0.57 0.74
4380 7 0 0.64 0.57 0.74
4745 2 0 0.64 0.57 0.74
5110 1 0 0.64 0.57 0.74
5475 1 0 0.64 0.57 0.74
# Incidencia acumulada para riesgos competitivos
# Convertir status en un factor con niveles y etiquetas específicas
# No es bueno cambiar el nombre porque me he dado cuenta que luego cuminc lee en orden alfabético
Melanoma$status = factor(Melanoma$status,
                         levels = c(0, 1, 2),
                         labels = c("Censored", "Dead from melanoma", "Dead from other cause"))

#Obtener las frecuencias de cada categoría y convertirla en un dataframe
frequencies_df = as.data.frame(table(Melanoma$status))

# Renombrar las columnas para mayor claridad
names(frequencies_df) = c("Status", "Frequency")

#Mostrar el dataframe
print(frequencies_df)
##                  Status Frequency
## 1              Censored       134
## 2    Dead from melanoma        57
## 3 Dead from other cause        14
#Ver el máximo de tiempo del registro
max(Melanoma$time)
## [1] 5565
#la función no me deja sin especificar el paquete
cum_inc = cmprsk :: cuminc(ftime = Melanoma$time,
                           fstatus = Melanoma$status,
                           cencode = 0)

length(cum_inc)
## [1] 3
#Gráfico de las incidencias acumuladas para los tres eventos
nombres_eventos = c("Censurado", "Muerte por melanoma", "Muerte por otra causa")

incidencia_dfs =  lapply(1:length(cum_inc), function(i){
  evento_actual = cum_inc[[i]]
  incidencia_df = data.frame(
    Time = evento_actual$time,
    Incidencia = evento_actual$est,
    Days = evento_actual$time,
    Evento = nombres_eventos[i]
  )
  return(incidencia_df)
})

#Unir todos los dataframes
incidencia_final = do.call(rbind, incidencia_dfs)

# Filtrar eventos específicos
incidencia_final_filtrado = incidencia_final %>%
  filter(Evento %in% c("Censurado", "Muerte por melanoma", "Muerte por otra causa"))

#Asegurarse de que Evento es un factor
incidencia_final_filtrado$Evento = as.factor(incidencia_final_filtrado$Evento)

#Colores para el evento
colores_eventos = c("Censurado" = "black", "Muerte por melanoma" = "#8A2BA2", "Muerte por otra causa" = "#8A8EE8")

#Crear el plot
g = ggplot(incidencia_final_filtrado, aes(x=Days, y = Incidencia, colour = Evento))+
  geom_step()+
  scale_x_continuous(limits = c(0, 5565))+
  scale_y_continuous(limits = c(0,1))+
  scale_colour_manual(values = colores_eventos, name = "Tipo evento")+
  labs(
    title = "Incidencia acumulada",
    subtitle = "Análisis por días",
    x = "Días",
    y = "Incidencia acumulada"
  )+
  theme_minimal()+
  theme(
    legend.title = element_text(size = 12),
    legend.text = element_text(size = 10),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12),
    plot.title = element_text(size = 12, face = "bold"),
    plot.subtitle = element_text(size = 10)
  )
#Mostrar el plot
g

#Calcular la incidencia acumulada
cuminc(Surv(time, status) ~ 1, data = Melanoma)
## 
## ── cuminc() ────────────────────────────────────────────────────────────────────
## • Failure type "Dead from melanoma"
## time    n.risk   estimate   std.error   95% CI          
## 1,000   171      0.127      0.023       0.086, 0.177    
## 2,000   103      0.230      0.030       0.174, 0.291    
## 3,000   54       0.310      0.037       0.239, 0.383    
## 4,000   13       0.339      0.041       0.260, 0.419    
## 5,000   1        0.339      0.041       0.260, 0.419
## • Failure type "Dead from other cause"
## time    n.risk   estimate   std.error   95% CI          
## 1,000   171      0.034      0.013       0.015, 0.066    
## 2,000   103      0.050      0.016       0.026, 0.087    
## 3,000   54       0.058      0.017       0.030, 0.099    
## 4,000   13       0.106      0.032       0.053, 0.179    
## 5,000   1        0.106      0.032       0.053, 0.179
#Realizar el gráfico para el evento de estudio
cuminc(Surv(time, status) ~ 1, data = Melanoma) %>% 
  ggcuminc() + 
  labs(
    x = "Days"
  ) + 
  add_confidence_interval() +
  add_risktable()
## Plotting outcome "Dead from melanoma".

# Realizar el gráfico para los dos eventos
cuminc(Surv(time, status) ~ 1, data = Melanoma) %>% 
  ggcuminc(outcome = c("Dead from melanoma", "Dead from other cause")) + 
  ylim(c(0,1))+
  labs(
    x = "Days"
  ) + 
  add_confidence_interval() +
  add_risktable()

# Incidencia acumulada según la presencia o no de úlcera
cuminc(Surv(time, status) ~ ulcer, data = Melanoma) %>% 
  tbl_cuminc(
    times = 1826.25, 
    label_header = "**{time/365.25}-year cuminc**") %>% 
  add_p()
Characteristic 5-year cuminc p-value1
ulcer
<0.001
    0 9.1% (4.6%, 15%)
    1 39% (29%, 49%)
1 Gray’s Test
# Gráfico de la presencia o no de úlcera
cuminc(Surv(time, status) ~ ulcer, data = Melanoma) %>% 
  ggcuminc() + 
  labs(
    x = "Days"
  ) + 
  add_confidence_interval() +
  add_risktable()
## Plotting outcome "Dead from melanoma".
## Warning: Removed 4 rows containing missing values (`geom_step()`).

# Regresión de Gray para riesgos competitivos
crr(Surv(time, status) ~ sex + age, data = Melanoma) %>%
  tbl_regression(exp = T)
Characteristic HR1 95% CI1 p-value
sex 1.80 1.06, 3.07 0.030
age 1.01 0.99, 1.03 0.2
1 HR = Hazard Ratio, CI = Confidence Interval