Mod_Supervivencia

Gráficas y funciones empleadas para el documento “Modelos de supervivencia, aplicación al caso de COVID19 en México en 2019”

En este documento se describen los procedimientos para el ajuste de un modelos parámetrico de supervivencia.

Curvas de las funciones de supervivencia usando el estimador de Kaplan-Meier

# Estimación de KAPLAN-MEIER y función de riesgo acumulativa, total y por sexo ----
covid.table <-
  survfit(Surv(TIEMPO, CENSURA) ~ 1, data = Covid19_2020_data, conf.int = 0.95)

# S(t)
autoplot(covid.table, xlab = 'Tiempo dias', ylab = 'Probabilidad de supervivencia ')

# H(t)
autoplot(covid.table,
         xlab = 'Tiempo (días)',
         ylab = 'Probabilidad acumulada de riesgo ',
         fun = "cumhaz")

covid.table.02 <-
  survfit(Surv(TIEMPO, CENSURA) ~ SEXO_L, data = Covid19_2020_data)

datatable(surv_summary(covid.table.02))%>%
    formatRound(columns=c('surv', 'std.err', 'upper', 'lower'), digits=3)

# S(t)
autoplot(covid.table.02,
         xlab = 'Tiempo días',
         ylab = 'Probabilidad de supervivencia ',
         lty = 1:2)

# H(t)
autoplot(
  covid.table.02,
  xlab = 'Tiempo dias',
  ylab = 'Probabilidad acumulada de riesgo ',
  lty = 1:2,
  fun = "cumhaz"
)

# Prueba de log rangos para S(t) por sexo -----
Log.Rango<-survdiff(Surv(TIEMPO, CENSURA) ~ SEXO_L, data = Covid19_2020_data)

pander(Log.Rango)

Call: Surv(TIEMPO, CENSURA) ~ SEXO_L Chisq = 77.615187 on 1 degrees of freedom, p = 0.000000
	N	Observed	Expected	(O-E)^2/E	(O-E)^2/V
SEXO_L=HOMBRE	94797	94797	96351	25.06	77.62
SEXO_L=MUJER	54648	54648	53094	45.49	77.62

Curvas de supervivencia por estados

# Total de eventos por entidad ----
s <- table(Covid19_2020_data %>% dplyr::select(ENTIDAD_UM_NOM))

ggplot(as.data.frame(s),
       aes(
         x = reorder(ENTIDAD_UM_NOM, -Freq),
         y = Freq,
         fill = ENTIDAD_UM_NOM
       )) +
  geom_bar(stat = "identity") + theme(legend.position = " none") +
  labs(x = "Entidad de la Unida Médica", y = "Frecuencias") +
  geom_text(
    aes(label = Freq),
    size = 2.6,
    angle = 90,
    color = "black",
    position = position_dodge(width = 0.9),
    vjust = -0
  ) +
  theme(axis.text.x = element_text(
    angle = 90,
    vjust = 0.5,
    hjust = 1
  ))

# Gráficas de S(t) para cada entidad ----
covid.table.entidad <-
  survfit(Surv(TIEMPO, CENSURA) ~ ENTIDAD_UM_NOM, data = Covid19_2020_data)

covid.table.entidad <- broom::tidy(covid.table.entidad)

ggplot(data = covid.table.entidad, aes(x = time, y = estimate)) +
  geom_line() +
  xlab("Tiempo") + ylab("Prob. supervivencia S(t)") +
  facet_wrap(strata ~ .)

# Modelo de riesgos proporcionales de Cox ----
cox.regression <- coxph(
  Surv(TIEMPO, CENSURA) ~ EDAD + SEXO_L + DIABETES_L + OBESIDAD_L + TIPO_PACIENTE_L,
  data = as.data.frame(Covid19_2020_data)
)

# Gráfica de riesgos proporcionales 
ggforest(cox.regression, fontsize = 0.6, noDigits = 3)

stargazer(cox.regression,  type = "html")


	Dependent variable:

	TIEMPO

EDAD	0.003^***
	(0.0002)

SEXO_LMUJER	0.037^***
	(0.005)

DIABETES_LSÍ	0.090^***
	(0.005)

OBESIDAD_LSÍ	-0.009
	(0.006)

TIPO_PACIENTE_LHOSPITALIZADO	0.057^***
	(0.009)


Observations	148,266
R²	0.004
Max. Possible R²	1.000
Log Likelihood	-1,616,793.000
Wald Test	631.670^*** (df = 5)
LR Test	632.815^*** (df = 5)
Score (Logrank) Test	632.010^*** (df = 5)

Note:	p<0.1; p<0.05; p<0.01

#S(t|x)
autoplot(
  survfit(cox.regression),
  surv.linetype = 'dashed',
  surv.colour = 'blue',
  conf.int.fill = 'dodgerblue3',
  conf.int.alpha = 0.5,
  xlab = 'Tiempo (días)',
  ylab = 'Probabilidad de supervivencia S(t)  ',
  main = 'Modelos de Riesgos proporcionales de Cox'
)

# Prueba del supuesto de riesgos proporcionales----

cox.zph(cox.regression)

##                   chisq df       p
## EDAD              0.089  1    0.77
## SEXO_L           80.918  1 < 2e-16
## DIABETES_L       63.098  1 2.0e-15
## OBESIDAD_L       85.513  1 < 2e-16
## TIPO_PACIENTE_L  29.417  1 5.8e-08
## GLOBAL          280.547  5 < 2e-16

# Selección de variables usando el criterio de Akaike - Modelo de COX ----
modelo_nulo_cox <- coxph(
  Surv(TIEMPO, CENSURA) ~ 1,
  data = na.omit(Covid19_2020_data[,c("TIEMPO","CENSURA","EDAD","SEXO_L","DIABETES_L","OBESIDAD_L","TIPO_PACIENTE_L")])
)


# Selección por pasos
modelo_pasos_cox <- step(modelo_nulo_cox, scope = list(lower = modelo_nulo_cox, upper = cox.regression), direction = "forward")

Start: AIC=3234218 Surv(TIEMPO, CENSURA) ~ 1

              Df     AIC

DIABETES_L 1 3233886
EDAD 1 3233967
SEXO_L 1 3234145
TIPO_PACIENTE_L 1 3234173
OBESIDAD_L 1 3234217 3234218

Step: AIC=3233886 Surv(TIEMPO, CENSURA) ~ DIABETES_L

              Df     AIC

EDAD 1 3233674
SEXO_L 1 3233834
TIPO_PACIENTE_L 1 3233846
OBESIDAD_L 1 3233880 3233886

Step: AIC=3233674 Surv(TIEMPO, CENSURA) ~ DIABETES_L + EDAD

              Df     AIC

SEXO_L 1 3233632
TIPO_PACIENTE_L 1 3233638 3233674
OBESIDAD_L 1 3233676

Step: AIC=3233632 Surv(TIEMPO, CENSURA) ~ DIABETES_L + EDAD + SEXO_L

              Df     AIC

TIPO_PACIENTE_L 1 3233596
OBESIDAD_L 1 3233631 3233632

Step: AIC=3233596 Surv(TIEMPO, CENSURA) ~ DIABETES_L + EDAD + SEXO_L + TIPO_PACIENTE_L

         Df     AIC

OBESIDAD_L 1 3233595 3233596

Step: AIC=3233595 Surv(TIEMPO, CENSURA) ~ DIABETES_L + EDAD + SEXO_L + TIPO_PACIENTE_L + OBESIDAD_L

stargazer(modelo_pasos_cox, type = "html")


	Dependent variable:

	TIEMPO

DIABETES_LSÍ	0.090^***
	(0.005)

EDAD	0.003^***
	(0.0002)

SEXO_LMUJER	0.037^***
	(0.005)

TIPO_PACIENTE_LHOSPITALIZADO	0.057^***
	(0.009)

OBESIDAD_LSÍ	-0.009
	(0.006)


Observations	148,266
R²	0.004
Max. Possible R²	1.000
Log Likelihood	-1,616,793.000
Wald Test	631.670^*** (df = 5)
LR Test	632.815^*** (df = 5)
Score (Logrank) Test	632.010^*** (df = 5)

Note:	p<0.1; p<0.05; p<0.01

Regresión de Weibull

# Regresión de Weibull
weibull.regression <- survreg(
  Surv(TIEMPO, CENSURA) ~ EDAD + SEXO_L + DIABETES_L + OBESIDAD_L + TIPO_PACIENTE_L,
  data = Covid19_2020_data,
  dist = "weibull"
)
stargazer(weibull.regression, type= "html")


	Dependent variable:

	TIEMPO

EDAD	-0.002^***
	(0.0001)

SEXO_LMUJER	-0.019^***
	(0.003)

DIABETES_LSÍ	-0.054^***
	(0.003)

OBESIDAD_LSÍ	-0.001
	(0.004)

TIPO_PACIENTE_LHOSPITALIZADO	-0.058^***
	(0.006)

Constant	2.968^***
	(0.009)


Observations	148,266
Log Likelihood	-514,062.200
chi²	816.548^*** (df = 5)

Note:	p<0.1; p<0.05; p<0.01

# Extracción de los parámetros para trazar S(t) para la distribución de Weibull ----

lfit <- weibull.regression
b <- lfit$coefficients # coeficientes asociados a las covariables
s <- lfit$scale #recíproco del parámetro de gamma
l <- exp(-(b[1] + b[2] + b[3] + b[4] + b[5] + b[6]) / s) #lambda
g <- 1 / s #gamma parámetro de forma




# Curva de predicción de para un perfil fijo -----
# EDAD, SEXO, DIABETES, OBESIDAD y TIPO_PACIENTE
pct <- 1:100 / 100   # Percentiles
ptime <- predict(
  lfit,
  newdata = data.frame(
    EDAD = 50,
    SEXO_L = "HOMBRE",
    DIABETES_L = "SÍ",
    OBESIDAD_L = "SÍ",
    TIPO_PACIENTE_L = "HOSPITALIZADO"
  ),
  type = 'quantile',
  p = pct,
  se = TRUE
)

matplot(
  cbind(
    ptime$fit,
    ptime$fit + 2 * ptime$se.fit,
    ptime$fit - 2 * ptime$se.fit
  ),
  1 - pct,
  xlab = 'Tiempo (días)',
  ylab = 'Probabilidad de supervivencia S(t)  ',
  main = 'Modelos de Weibull\n EDAD=50, SEXO=HOMBRE,DIABETES=SÍ, OBESIDAD=SÍ,
            TIPO_PACIENTE=HOSPITALIZADO',
  type = 'l',
  lty = c(1, 2, 2),
  col = c("red", "blue", "blue")
)


# Gráfica parámetrica de S_0(t) (función de referencia) ----
T <- seq(0, 50, 0.1)
S0 <- exp(-l * T ^ g)
lines(T, S0, lty = 1, col = "green")


legend(
  "topright",
  legend = c("Curva predicción", "Curva paramétrica"),
  col = c("red", "green"),
  lty = 1:1,
  cex = 0.8
)

# Se consideran solo los casos con datos completos en las
# variables TIEMPO,EDAD,SEXO_L,DIABETES_L,OBESIDAD_L y TIPO_PACIENTE_L

cox_snell_resid <- na.omit(Covid19_2020_data[,c("TIEMPO","EDAD","SEXO_L","DIABETES_L","OBESIDAD_L","TIPO_PACIENTE_L")])$TIEMPO* exp(-predict(weibull.regression, type = "lp"))

km_fit_CSnell <- survfit(Surv(cox_snell_resid, rep(1,length(cox_snell_resid) )) ~ 1 )

plot(km_fit_CSnell, fun = "cumhaz", conf.int = FALSE, xlab = "Residuales Cox-Snell", ylab = "Riesgo acumulativo H(t)", main = "Residuales de Cox-Snell  para un modelo Weibull")
abline(0, 1, col = "red")

# NOTA La gráfica no muestra un ajuste adecuado a un modelo
#          de regresión de Weibull con las covariables asociadas.


# Selección de variables usando el criterio de Akaike - Modelo Weibull ----
modelo_nulo_weibull <- 
  survreg(
    Surv(TIEMPO, CENSURA) ~ 1,
    data = na.omit(Covid19_2020_data[,c("TIEMPO","CENSURA","EDAD","SEXO_L","DIABETES_L","OBESIDAD_L","TIPO_PACIENTE_L")]),
    dist = "weibull"
  )

# Selección por pasos
modelo_pasos_weibull <- step(modelo_nulo_weibull, scope = list(lower = modelo_nulo_weibull, upper = weibull.regression), direction = "forward")

Start: AIC=1028945 Surv(TIEMPO, CENSURA) ~ 1

              Df     AIC

EDAD 1 1028556
DIABETES_L 1 1028608
TIPO_PACIENTE_L 1 1028825
SEXO_L 1 1028882 1028945
OBESIDAD_L 1 1028947

Step: AIC=1028556 Surv(TIEMPO, CENSURA) ~ EDAD

              Df     AIC

DIABETES_L 1 1028271
TIPO_PACIENTE_L 1 1028445
SEXO_L 1 1028509
OBESIDAD_L 1 1028554 1028556

Step: AIC=1028271 Surv(TIEMPO, CENSURA) ~ EDAD + DIABETES_L

              Df     AIC

TIPO_PACIENTE_L 1 1028168
SEXO_L 1 1028238 1028271
OBESIDAD_L 1 1028272

Step: AIC=1028168 Surv(TIEMPO, CENSURA) ~ EDAD + DIABETES_L + TIPO_PACIENTE_L

         Df     AIC

SEXO_L 1 1028137 1028168
OBESIDAD_L 1 1028169

Step: AIC=1028137 Surv(TIEMPO, CENSURA) ~ EDAD + DIABETES_L + TIPO_PACIENTE_L + SEXO_L

         Df     AIC

1028137 + OBESIDAD_L 1 1028138

stargazer(modelo_pasos_weibull, type="html")


	Dependent variable:

	TIEMPO

EDAD	-0.002^***
	(0.0001)

DIABETES_LSÍ	-0.054^***
	(0.003)

TIPO_PACIENTE_LHOSPITALIZADO	-0.058^***
	(0.006)

SEXO_LMUJER	-0.019^***
	(0.003)

Constant	2.968^***
	(0.009)


Observations	148,266
Log Likelihood	-514,062.300
chi²	816.410^*** (df = 4)

Note:	p<0.1; p<0.05; p<0.01

Tabla de comparación del modelo de Cox y de Weibull

# Generación automática de tabla de regresión en LaTex.


stargazer(cox.regression, weibull.regression, type = "html")


	Dependent variable:

	TIEMPO
	Cox	Weibull
	prop. hazards
	(1)	(2)

EDAD	0.003^***	-0.002^***
	(0.0002)	(0.0001)

SEXO_LMUJER	0.037^***	-0.019^***
	(0.005)	(0.003)

DIABETES_LSÍ	0.090^***	-0.054^***
	(0.005)	(0.003)

OBESIDAD_LSÍ	-0.009	-0.001
	(0.006)	(0.004)

TIPO_PACIENTE_LHOSPITALIZADO	0.057^***	-0.058^***
	(0.009)	(0.006)

Constant		2.968^***
		(0.009)


Observations	148,266	148,266
R²	0.004
Max. Possible R²	1.000
Log Likelihood	-1,616,793.000	-514,062.200
chi²		816.548^*** (df = 5)
Wald Test	631.670^*** (df = 5)
LR Test	632.815^*** (df = 5)
Score (Logrank) Test	632.010^*** (df = 5)

Note:	p<0.1; p<0.05; p<0.01

Mod_Supervivencia_ejemplo

2026-04-06

Gráficas y funciones empleadas para el documento “Modelos de supervivencia, aplicación al caso de COVID19 en México en 2019”

Curvas de las funciones de supervivencia usando el estimador de Kaplan-Meier

Curvas de supervivencia por estados

Regresión de Weibull

Tabla de comparación del modelo de Cox y de Weibull