library(survival)
library(survminer)
library(tidyverse)
library(dplyr)
library(magrittr)
crudos <- read_csv("Kudos to DXY.cn Last update_ 03_13_2020,  8_00 PM (EST).csv", col_types = cols())
glimpse(crudos, width = 80)
Observations: 3,397
Variables: 22
$ id                     <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1...
$ case_in_country        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
$ `reporting date`       <chr> "1/20/2020", "1/20/2020", "1/21/2020", "1/21...
$ location               <chr> "Shenzhen, Guangdong", "Shanghai", "Zhejiang...
$ country                <chr> "China", "China", "China", "China", "China",...
$ gender                 <chr> "male", "female", "male", "female", "male", ...
$ age                    <dbl> 66, 56, 46, 60, 58, 44, 34, 37, 39, 56, 18, ...
$ symptom_onset          <chr> "1/3/2020", "1/15/2020", "1/4/2020", NA, NA,...
$ If_onset_approximated  <dbl> 0, 0, 0, NA, NA, 0, 0, 0, 0, 0, 0, 0, NA, 0,...
$ hosp_visit_date        <chr> "1/11/2020", "1/15/2020", "1/17/2020", "1/19...
$ international_traveler <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
$ domestic_traveler      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
$ exposure_start         <chr> "12/29/2019", NA, NA, NA, NA, NA, NA, "1/10/...
$ exposure_end           <chr> "1/4/2020", "1/12/2020", "1/3/2020", NA, NA,...
$ traveler               <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
$ `visiting Wuhan`       <dbl> 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0,...
$ `from Wuhan`           <dbl> 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1,...
$ death                  <chr> "0", "0", "0", "0", "0", "0", "0", "0", "0",...
$ recovered              <chr> "0", "0", "0", "0", "0", "0", "0", "0", "0",...
$ symptom                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
$ source                 <chr> "Shenzhen Municipal Health Commission", "Off...
$ link                   <chr> "http://wjw.sz.gov.cn/wzx/202001/t20200120_1...
age_bins <- c('20-50', '51-80', '81+')
crudos$age_bin <- cut_interval(crudos$age, n = 3, closed = 'right', labels = age_bins)
datos_snulos <- crudos %>% rename(reporting.date = `reporting date`) #%>%
  #mutate(symptom_onset = coalesce(symptom_onset, hosp_visit_date)) #%>%
  #mutate(symptom_onset = coalesce(symptom_onset, reporting.date))
cols_de_interes <- datos_snulos %>% select(id, death, symptom_onset, age_bin, symptom, gender)
muertes_binario <- cols_de_interes %>% mutate(muerte = ifelse(death == 0, 0, 1)) # 1/0 para muertes ya que en algunos casos tenemos fechas.
fechas_muerte <- muertes_binario %>% filter(death != 1) # nos quedamos con ceros o fechas.
fechas_muerte$death <- as.character(fechas_muerte$death)
fechas_muerte <- fechas_muerte %>% mutate(fecha_deceso = if_else(muerte == 0, "06/31/2020", death)) # fifelse preserva el tipo de dato de la columna.
fechas_muerte$fecha_deceso <- as.Date(fechas_muerte$fecha_deceso, format="%m/%d/%Y")
fechas_muerte$symptom_onset <- as.Date(fechas_muerte$symptom_onset, format="%m/%d/%Y")
fechas_muerte$tiempo <- as.integer(fechas_muerte$fecha_deceso - fechas_muerte$symptom_onset)
fechas_limpio <- fechas_muerte %>%
  drop_na(tiempo) %>%
  filter(muerte == 1 & tiempo > 0)
sobrevive <- fechas_muerte %>%
  filter(muerte == 0) 
sobrevive_muestra <- sobrevive[sample(nrow(sobrevive), nrow(fechas_limpio)), ] %>%
  drop_na(tiempo)
datos_analisis <- rbind(fechas_limpio, sobrevive_muestra)  %>%
  filter(tiempo >= 5)
summary(datos_analisis$tiempo)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      7      13      20      18      23      27 
ggplot(data = datos_analisis, aes(x = tiempo)) +
  geom_density() + 
  labs(title = "PDF Tiempos")

modelo <- survfit(Surv(tiempo, muerte) ~ 1, data = datos_analisis)
ggsurvplot(
  modelo,
  #add.all = TRUE,
  title = "Curva de Supervivencia Kaplan-Meier",
  subtitle = "Fecha Síntomas vs Fecha Deceso",
  xlab = "Días desde Primer Sintoma", 
  ylab = "Prop. Supervivencia",
  risk.table.title = "Casos en Riesgo",
  #legend.title = "Rango Edad",
  risk.table = TRUE,
  tables.height = 0.25,
  surv.median.line = "hv",
  tables.theme = theme_cleantable(),
  #palette = c("#E7B800", "#2E9FDF", "#2E8FDF"),
  ggtheme = theme_bw(),
  pval = TRUE,
  conf.int = TRUE
  #ncensor.plot = TRUE
  #legend.labs = c("rango_1 = 51-80", "rango_2 = 81+")
) 
There are no survival curves to be compared. 
 This is a null model.

Funcion de Supervivencia

\[ \hat{S}(t)=\prod_{t_{i}<t} \frac{n_{i}-d_{i}}{n_{i}} \] Donde \(n_{i}\) es la cantidad de casos en riesgo antes del tiempo \(t_{i}\).
\(d_{i}\) son los decesos en el tiempo \(t_{i}\).

LS0tDQp0aXRsZTogIkFuYWxpc2lzIGRlIFN1cGVydml2ZW5jaWEgQ09WSUQtMTkiDQpoZWFkZXItaW5jbHVkZXM6DQogICAtIFx1c2VwYWNrYWdle2JibX0NCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCmBgYHtyfQ0KbGlicmFyeShzdXJ2aXZhbCkNCmxpYnJhcnkoc3Vydm1pbmVyKQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShtYWdyaXR0cikNCmBgYA0KDQpgYGB7cn0NCmNydWRvcyA8LSByZWFkX2NzdigiS3Vkb3MgdG8gRFhZLmNuIExhc3QgdXBkYXRlXyAwM18xM18yMDIwLCAgOF8wMCBQTSAoRVNUKS5jc3YiLCBjb2xfdHlwZXMgPSBjb2xzKCkpDQpnbGltcHNlKGNydWRvcywgd2lkdGggPSA4MCkNCmBgYA0KDQpgYGB7cn0NCmFnZV9iaW5zIDwtIGMoJzIwLTUwJywgJzUxLTgwJywgJzgxKycpDQpjcnVkb3MkYWdlX2JpbiA8LSBjdXRfaW50ZXJ2YWwoY3J1ZG9zJGFnZSwgbiA9IDMsIGNsb3NlZCA9ICdyaWdodCcsIGxhYmVscyA9IGFnZV9iaW5zKQ0KDQpkYXRvc19zbnVsb3MgPC0gY3J1ZG9zICU+JSByZW5hbWUocmVwb3J0aW5nLmRhdGUgPSBgcmVwb3J0aW5nIGRhdGVgKSAjJT4lDQogICNtdXRhdGUoc3ltcHRvbV9vbnNldCA9IGNvYWxlc2NlKHN5bXB0b21fb25zZXQsIGhvc3BfdmlzaXRfZGF0ZSkpICMlPiUNCiAgI211dGF0ZShzeW1wdG9tX29uc2V0ID0gY29hbGVzY2Uoc3ltcHRvbV9vbnNldCwgcmVwb3J0aW5nLmRhdGUpKQ0KDQpjb2xzX2RlX2ludGVyZXMgPC0gZGF0b3Nfc251bG9zICU+JSBzZWxlY3QoaWQsIGRlYXRoLCBzeW1wdG9tX29uc2V0LCBhZ2VfYmluLCBzeW1wdG9tLCBnZW5kZXIpDQptdWVydGVzX2JpbmFyaW8gPC0gY29sc19kZV9pbnRlcmVzICU+JSBtdXRhdGUobXVlcnRlID0gaWZlbHNlKGRlYXRoID09IDAsIDAsIDEpKSAjIDEvMCBwYXJhIG11ZXJ0ZXMgeWEgcXVlIGVuIGFsZ3Vub3MgY2Fzb3MgdGVuZW1vcyBmZWNoYXMuDQpmZWNoYXNfbXVlcnRlIDwtIG11ZXJ0ZXNfYmluYXJpbyAlPiUgZmlsdGVyKGRlYXRoICE9IDEpICMgbm9zIHF1ZWRhbW9zIGNvbiBjZXJvcyBvIGZlY2hhcy4NCg0KZmVjaGFzX211ZXJ0ZSRkZWF0aCA8LSBhcy5jaGFyYWN0ZXIoZmVjaGFzX211ZXJ0ZSRkZWF0aCkNCmZlY2hhc19tdWVydGUgPC0gZmVjaGFzX211ZXJ0ZSAlPiUgbXV0YXRlKGZlY2hhX2RlY2VzbyA9IGlmX2Vsc2UobXVlcnRlID09IDAsICIwNi8zMS8yMDIwIiwgZGVhdGgpKSAjIGZpZmVsc2UgcHJlc2VydmEgZWwgdGlwbyBkZSBkYXRvIGRlIGxhIGNvbHVtbmEuDQoNCmZlY2hhc19tdWVydGUkZmVjaGFfZGVjZXNvIDwtIGFzLkRhdGUoZmVjaGFzX211ZXJ0ZSRmZWNoYV9kZWNlc28sIGZvcm1hdD0iJW0vJWQvJVkiKQ0KZmVjaGFzX211ZXJ0ZSRzeW1wdG9tX29uc2V0IDwtIGFzLkRhdGUoZmVjaGFzX211ZXJ0ZSRzeW1wdG9tX29uc2V0LCBmb3JtYXQ9IiVtLyVkLyVZIikNCmZlY2hhc19tdWVydGUkdGllbXBvIDwtIGFzLmludGVnZXIoZmVjaGFzX211ZXJ0ZSRmZWNoYV9kZWNlc28gLSBmZWNoYXNfbXVlcnRlJHN5bXB0b21fb25zZXQpDQoNCmZlY2hhc19saW1waW8gPC0gZmVjaGFzX211ZXJ0ZSAlPiUNCiAgZHJvcF9uYSh0aWVtcG8pICU+JQ0KICBmaWx0ZXIobXVlcnRlID09IDEgJiB0aWVtcG8gPiAwKQ0KDQpzb2JyZXZpdmUgPC0gZmVjaGFzX211ZXJ0ZSAlPiUNCiAgZmlsdGVyKG11ZXJ0ZSA9PSAwKSANCg0Kc29icmV2aXZlX211ZXN0cmEgPC0gc29icmV2aXZlW3NhbXBsZShucm93KHNvYnJldml2ZSksIG5yb3coZmVjaGFzX2xpbXBpbykpLCBdICU+JQ0KICBkcm9wX25hKHRpZW1wbykNCg0KZGF0b3NfYW5hbGlzaXMgPC0gcmJpbmQoZmVjaGFzX2xpbXBpbywgc29icmV2aXZlX211ZXN0cmEpICAlPiUNCiAgZmlsdGVyKHRpZW1wbyA+PSA1KQ0KDQpzdW1tYXJ5KGRhdG9zX2FuYWxpc2lzJHRpZW1wbykNCg0KZ2dwbG90KGRhdGEgPSBkYXRvc19hbmFsaXNpcywgYWVzKHggPSB0aWVtcG8pKSArDQogIGdlb21fZGVuc2l0eSgpICsgDQogIGxhYnModGl0bGUgPSAiUERGIFRpZW1wb3MiKQ0KYGBgDQoNCmBgYHtyfQ0KbW9kZWxvIDwtIHN1cnZmaXQoU3Vydih0aWVtcG8sIG11ZXJ0ZSkgfiAxLCBkYXRhID0gZGF0b3NfYW5hbGlzaXMpDQpnZ3N1cnZwbG90KA0KICBtb2RlbG8sDQogICNhZGQuYWxsID0gVFJVRSwNCiAgdGl0bGUgPSAiQ3VydmEgZGUgU3VwZXJ2aXZlbmNpYSBLYXBsYW4tTWVpZXIiLA0KICBzdWJ0aXRsZSA9ICJGZWNoYSBT7W50b21hcyB2cyBGZWNoYSBEZWNlc28iLA0KICB4bGFiID0gIkTtYXMgZGVzZGUgUHJpbWVyIFNpbnRvbWEiLCANCiAgeWxhYiA9ICJQcm9wLiBTdXBlcnZpdmVuY2lhIiwNCiAgcmlzay50YWJsZS50aXRsZSA9ICJDYXNvcyBlbiBSaWVzZ28iLA0KICAjbGVnZW5kLnRpdGxlID0gIlJhbmdvIEVkYWQiLA0KICByaXNrLnRhYmxlID0gVFJVRSwNCiAgdGFibGVzLmhlaWdodCA9IDAuMjUsDQogIHN1cnYubWVkaWFuLmxpbmUgPSAiaHYiLA0KICB0YWJsZXMudGhlbWUgPSB0aGVtZV9jbGVhbnRhYmxlKCksDQogICNwYWxldHRlID0gYygiI0U3QjgwMCIsICIjMkU5RkRGIiwgIiMyRThGREYiKSwNCiAgZ2d0aGVtZSA9IHRoZW1lX2J3KCksDQogIHB2YWwgPSBUUlVFLA0KICBjb25mLmludCA9IFRSVUUNCiAgI25jZW5zb3IucGxvdCA9IFRSVUUNCiAgI2xlZ2VuZC5sYWJzID0gYygicmFuZ29fMSA9IDUxLTgwIiwgInJhbmdvXzIgPSA4MSsiKQ0KKSANCmBgYA0KPGg0IGFsaWduPSJjZW50ZXIiPkZ1bmNpb24gZGUgU3VwZXJ2aXZlbmNpYTwvaDQ+DQokJCBcaGF0e1N9KHQpPVxwcm9kX3t0X3tpfTx0fSBcZnJhY3tuX3tpfS1kX3tpfX17bl97aX19ICQkDQpEb25kZSAkbl97aX0kIGVzIGxhIGNhbnRpZGFkIGRlIGNhc29zIGVuIHJpZXNnbyBhbnRlcyBkZWwgdGllbXBvICR0X3tpfSQuIDxicj4NCiRkX3tpfSQgc29uIGxvcyBkZWNlc29zIGVuIGVsIHRpZW1wbyAkdF97aX0kLg0KDQo=