Tarea.R

library(survival)
library(ggplot2)
library(cmprsk)
library(openxlsx)
library(kableExtra)
library(readxl)
library(survminer)

## Loading required package: ggpubr

## 
## Attaching package: 'survminer'

## The following object is masked from 'package:survival':
## 
##     myeloma

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::group_rows() masks kableExtra::group_rows()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(haven)
library(reshape2)

## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths

library(lubridate)
library(tidycmprsk)

## 
## Attaching package: 'tidycmprsk'
## 
## The following objects are masked from 'package:cmprsk':
## 
##     crr, cuminc

library(ggsurvfit)
library(gtsummary)

## 
## Attaching package: 'gtsummary'
## 
## The following object is masked from 'package:tidycmprsk':
## 
##     trial

#Ejemplo con melanoma
data(Melanoma, package = "MASS")
Melanoma <- 
  Melanoma %>% 
  mutate(
    status = as.factor(recode(status, `2` = 0, `1` = 1, `3` = 2))
  )
#Crear la variable para la supervivencia global
Melanoma$status1 = ifelse(Melanoma$status == 1, 1 ,0)

#Supervivencia global
# Calcular la superviviencia global
# Crear el objeto de supervivencia
surv_obj = Surv(time = Melanoma$time,
                event = Melanoma$status1)

# Ajustar el modelo de Kaplan-Meier
fit = survfit(surv_obj ~ 1,
              data = Melanoma)

# Generar el plot de Kaplan-Meier
g = ggsurvplot(fit,
               data = Melanoma,
               pval = FALSE,
               conf.int = FALSE,
               ggtheme = theme_bw(),
               palette = c("#76EEC6"),
               title = "Global Survival",
               xlab = "Days",
               ylab = "Survival Probability",
               xlim = c(0, max(Melanoma$time)))

#Añadir líneas verticales a los 0,1,3,5,10 y 15 años
g$plot = g$plot +
  geom_vline(xintercept = 0, color = "#7c7c7c", linetype = "dotted") +
  geom_vline(xintercept = 365, color = "#7c7c7c", linetype = "dotted") +
  geom_vline(xintercept = 1095, color = "#7c7c7c", linetype = "dotted") +
  geom_vline(xintercept = 1825, color = "#7c7c7c", linetype = "dotted") +
  geom_vline(xintercept = 3650, color = "#7c7c7c", linetype = "dotted") +
  geom_vline(xintercept = 5475, color = "#7c7c7c", linetype = "dotted")

g$plot = g$plot + scale_x_continuous(
  breaks = seq(from = 0, to = max(Melanoma$time), by = 365),
  expand = c(0,0) #Eliminas la expansión del eje x
)

## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

# Especificar los tiempos de interés
times = seq(from = 0, to = max(Melanoma$time), by = 365)

# Obtener el summary para esos tiempos específicos
summary_fit = summary(fit,
                      times = times)

# Convertir el summary a un dataframe
df_summary = data.frame(
  time = summary_fit$time,
  n.risk = summary_fit$n.risk,
  n.event = summary_fit$n.event,
  survival = round(summary_fit$surv, 2),
  lower = round(summary_fit$lower,2),
  upper = round(summary_fit$upper,2)
)

#Lo presentamos en una tabla
df_summary %>% kable(format = "html") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE, position = "center", font_size = 12) %>%
  column_spec(1, bold = TRUE, color = "#FF7F00") %>%
  row_spec(0, background = "#993155", color = "white")

time	n.risk	n.event	survival	lower	upper
0	205	0	1.00	1.00	1.00
365	193	6	0.97	0.95	0.99
730	183	9	0.92	0.89	0.96
1095	167	15	0.85	0.80	0.90
1460	160	6	0.82	0.77	0.87
1825	122	9	0.77	0.71	0.83
2190	83	5	0.73	0.67	0.80
2555	64	3	0.70	0.63	0.77
2920	55	2	0.68	0.61	0.76
3285	38	1	0.66	0.59	0.75
3650	23	1	0.64	0.57	0.74
4015	12	0	0.64	0.57	0.74
4380	7	0	0.64	0.57	0.74
4745	2	0	0.64	0.57	0.74
5110	1	0	0.64	0.57	0.74
5475	1	0	0.64	0.57	0.74

# Incidencia acumulada para riesgos competitivos
# Convertir status en un factor con niveles y etiquetas específicas
# No es bueno cambiar el nombre porque me he dado cuenta que luego cuminc lee en orden alfabético
Melanoma$status = factor(Melanoma$status,
                         levels = c(0, 1, 2),
                         labels = c("Censored", "Dead from melanoma", "Dead from other cause"))

#Obtener las frecuencias de cada categoría y convertirla en un dataframe
frequencies_df = as.data.frame(table(Melanoma$status))

# Renombrar las columnas para mayor claridad
names(frequencies_df) = c("Status", "Frequency")

#Mostrar el dataframe
print(frequencies_df)

##                  Status Frequency
## 1              Censored       134
## 2    Dead from melanoma        57
## 3 Dead from other cause        14

#Ver el máximo de tiempo del registro
max(Melanoma$time)

## [1] 5565

#la función no me deja sin especificar el paquete
cum_inc = cmprsk :: cuminc(ftime = Melanoma$time,
                           fstatus = Melanoma$status,
                           cencode = 0)

length(cum_inc)

## [1] 3

#Gráfico de las incidencias acumuladas para los tres eventos
nombres_eventos = c("Censurado", "Muerte por melanoma", "Muerte por otra causa")

incidencia_dfs =  lapply(1:length(cum_inc), function(i){
  evento_actual = cum_inc[[i]]
  incidencia_df = data.frame(
    Time = evento_actual$time,
    Incidencia = evento_actual$est,
    Days = evento_actual$time,
    Evento = nombres_eventos[i]
  )
  return(incidencia_df)
})

#Unir todos los dataframes
incidencia_final = do.call(rbind, incidencia_dfs)

# Filtrar eventos específicos
incidencia_final_filtrado = incidencia_final %>%
  filter(Evento %in% c("Censurado", "Muerte por melanoma", "Muerte por otra causa"))

#Asegurarse de que Evento es un factor
incidencia_final_filtrado$Evento = as.factor(incidencia_final_filtrado$Evento)

#Colores para el evento
colores_eventos = c("Censurado" = "black", "Muerte por melanoma" = "#8A2BA2", "Muerte por otra causa" = "#8A8EE8")

#Crear el plot
g = ggplot(incidencia_final_filtrado, aes(x=Days, y = Incidencia, colour = Evento))+
  geom_step()+
  scale_x_continuous(limits = c(0, 5565))+
  scale_y_continuous(limits = c(0,1))+
  scale_colour_manual(values = colores_eventos, name = "Tipo evento")+
  labs(
    title = "Incidencia acumulada",
    subtitle = "Análisis por días",
    x = "Días",
    y = "Incidencia acumulada"
  )+
  theme_minimal()+
  theme(
    legend.title = element_text(size = 12),
    legend.text = element_text(size = 10),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12),
    plot.title = element_text(size = 12, face = "bold"),
    plot.subtitle = element_text(size = 10)
  )
#Mostrar el plot
g

#Calcular la incidencia acumulada
cuminc(Surv(time, status) ~ 1, data = Melanoma)

## 
## ── cuminc() ────────────────────────────────────────────────────────────────────

## • Failure type "Dead from melanoma"

## time    n.risk   estimate   std.error   95% CI          
## 1,000   171      0.127      0.023       0.086, 0.177    
## 2,000   103      0.230      0.030       0.174, 0.291    
## 3,000   54       0.310      0.037       0.239, 0.383    
## 4,000   13       0.339      0.041       0.260, 0.419    
## 5,000   1        0.339      0.041       0.260, 0.419

## • Failure type "Dead from other cause"

## time    n.risk   estimate   std.error   95% CI          
## 1,000   171      0.034      0.013       0.015, 0.066    
## 2,000   103      0.050      0.016       0.026, 0.087    
## 3,000   54       0.058      0.017       0.030, 0.099    
## 4,000   13       0.106      0.032       0.053, 0.179    
## 5,000   1        0.106      0.032       0.053, 0.179

#Realizar el gráfico para el evento de estudio
cuminc(Surv(time, status) ~ 1, data = Melanoma) %>% 
  ggcuminc() + 
  labs(
    x = "Days"
  ) + 
  add_confidence_interval() +
  add_risktable()

## Plotting outcome "Dead from melanoma".

# Realizar el gráfico para los dos eventos
cuminc(Surv(time, status) ~ 1, data = Melanoma) %>% 
  ggcuminc(outcome = c("Dead from melanoma", "Dead from other cause")) + 
  ylim(c(0,1))+
  labs(
    x = "Days"
  ) + 
  add_confidence_interval() +
  add_risktable()

# Incidencia acumulada según la presencia o no de úlcera
cuminc(Surv(time, status) ~ ulcer, data = Melanoma) %>% 
  tbl_cuminc(
    times = 1826.25, 
    label_header = "**{time/365.25}-year cuminc**") %>% 
  add_p()

Characteristic	5-year cuminc	p-value¹
ulcer		<0.001
0	9.1% (4.6%, 15%)
1	39% (29%, 49%)
¹ Gray’s Test

# Gráfico de la presencia o no de úlcera
cuminc(Surv(time, status) ~ ulcer, data = Melanoma) %>% 
  ggcuminc() + 
  labs(
    x = "Days"
  ) + 
  add_confidence_interval() +
  add_risktable()

## Plotting outcome "Dead from melanoma".

## Warning: Removed 4 rows containing missing values (`geom_step()`).

# Regresión de Gray para riesgos competitivos
crr(Surv(time, status) ~ sex + age, data = Melanoma) %>%
  tbl_regression(exp = T)

Characteristic	HR¹	95% CI¹	p-value
sex	1.80	1.06, 3.07	0.030
age	1.01	0.99, 1.03	0.2
¹ HR = Hazard Ratio, CI = Confidence Interval

Tarea.R

crisc

2024-01-30