library(survival)
library(survminer)
library(tidyverse)
library(dplyr)
library(magrittr)
crudos <- read_csv("Kudos to DXY.cn Last update_ 03_13_2020, 8_00 PM (EST).csv", col_types = cols())
glimpse(crudos, width = 80)
Observations: 3,397
Variables: 22
$ id [3m[38;5;246m<dbl>[39m[23m 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1...
$ case_in_country [3m[38;5;246m<dbl>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
$ `reporting date` [3m[38;5;246m<chr>[39m[23m "1/20/2020", "1/20/2020", "1/21/2020", "1/21...
$ location [3m[38;5;246m<chr>[39m[23m "Shenzhen, Guangdong", "Shanghai", "Zhejiang...
$ country [3m[38;5;246m<chr>[39m[23m "China", "China", "China", "China", "China",...
$ gender [3m[38;5;246m<chr>[39m[23m "male", "female", "male", "female", "male", ...
$ age [3m[38;5;246m<dbl>[39m[23m 66, 56, 46, 60, 58, 44, 34, 37, 39, 56, 18, ...
$ symptom_onset [3m[38;5;246m<chr>[39m[23m "1/3/2020", "1/15/2020", "1/4/2020", NA, NA,...
$ If_onset_approximated [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, NA, NA, 0, 0, 0, 0, 0, 0, 0, NA, 0,...
$ hosp_visit_date [3m[38;5;246m<chr>[39m[23m "1/11/2020", "1/15/2020", "1/17/2020", "1/19...
$ international_traveler [3m[38;5;246m<dbl>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
$ domestic_traveler [3m[38;5;246m<dbl>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
$ exposure_start [3m[38;5;246m<chr>[39m[23m "12/29/2019", NA, NA, NA, NA, NA, NA, "1/10/...
$ exposure_end [3m[38;5;246m<chr>[39m[23m "1/4/2020", "1/12/2020", "1/3/2020", NA, NA,...
$ traveler [3m[38;5;246m<dbl>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
$ `visiting Wuhan` [3m[38;5;246m<dbl>[39m[23m 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0,...
$ `from Wuhan` [3m[38;5;246m<dbl>[39m[23m 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1,...
$ death [3m[38;5;246m<chr>[39m[23m "0", "0", "0", "0", "0", "0", "0", "0", "0",...
$ recovered [3m[38;5;246m<chr>[39m[23m "0", "0", "0", "0", "0", "0", "0", "0", "0",...
$ symptom [3m[38;5;246m<chr>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
$ source [3m[38;5;246m<chr>[39m[23m "Shenzhen Municipal Health Commission", "Off...
$ link [3m[38;5;246m<chr>[39m[23m "http://wjw.sz.gov.cn/wzx/202001/t20200120_1...
age_bins <- c('20-50', '51-80', '81+')
crudos$age_bin <- cut_interval(crudos$age, n = 3, closed = 'right', labels = age_bins)
datos_snulos <- crudos %>% rename(reporting.date = `reporting date`) #%>%
#mutate(symptom_onset = coalesce(symptom_onset, hosp_visit_date)) #%>%
#mutate(symptom_onset = coalesce(symptom_onset, reporting.date))
cols_de_interes <- datos_snulos %>% select(id, death, symptom_onset, age_bin, symptom, gender)
muertes_binario <- cols_de_interes %>% mutate(muerte = ifelse(death == 0, 0, 1)) # 1/0 para muertes ya que en algunos casos tenemos fechas.
fechas_muerte <- muertes_binario %>% filter(death != 1) # nos quedamos con ceros o fechas.
fechas_muerte$death <- as.character(fechas_muerte$death)
fechas_muerte <- fechas_muerte %>% mutate(fecha_deceso = if_else(muerte == 0, "06/31/2020", death)) # fifelse preserva el tipo de dato de la columna.
fechas_muerte$fecha_deceso <- as.Date(fechas_muerte$fecha_deceso, format="%m/%d/%Y")
fechas_muerte$symptom_onset <- as.Date(fechas_muerte$symptom_onset, format="%m/%d/%Y")
fechas_muerte$tiempo <- as.integer(fechas_muerte$fecha_deceso - fechas_muerte$symptom_onset)
fechas_limpio <- fechas_muerte %>%
drop_na(tiempo) %>%
filter(muerte == 1 & tiempo > 0)
sobrevive <- fechas_muerte %>%
filter(muerte == 0)
sobrevive_muestra <- sobrevive[sample(nrow(sobrevive), nrow(fechas_limpio)), ] %>%
drop_na(tiempo)
datos_analisis <- rbind(fechas_limpio, sobrevive_muestra) %>%
filter(tiempo >= 5)
summary(datos_analisis$tiempo)
Min. 1st Qu. Median Mean 3rd Qu. Max.
7 13 20 18 23 27
ggplot(data = datos_analisis, aes(x = tiempo)) +
geom_density() +
labs(title = "PDF Tiempos")
modelo <- survfit(Surv(tiempo, muerte) ~ 1, data = datos_analisis)
ggsurvplot(
modelo,
#add.all = TRUE,
title = "Curva de Supervivencia Kaplan-Meier",
subtitle = "Fecha Síntomas vs Fecha Deceso",
xlab = "Días desde Primer Sintoma",
ylab = "Prop. Supervivencia",
risk.table.title = "Casos en Riesgo",
#legend.title = "Rango Edad",
risk.table = TRUE,
tables.height = 0.25,
surv.median.line = "hv",
tables.theme = theme_cleantable(),
#palette = c("#E7B800", "#2E9FDF", "#2E8FDF"),
ggtheme = theme_bw(),
pval = TRUE,
conf.int = TRUE
#ncensor.plot = TRUE
#legend.labs = c("rango_1 = 51-80", "rango_2 = 81+")
)
There are no survival curves to be compared.
This is a null model.
\[ \hat{S}(t)=\prod_{t_{i}<t} \frac{n_{i}-d_{i}}{n_{i}} \] Donde \(n_{i}\) es la cantidad de casos en riesgo antes del tiempo \(t_{i}\).
\(d_{i}\) son los decesos en el tiempo \(t_{i}\).