library(survival)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
txt <- "
Sexo Nacimiento Muerte
F 1929/03/09 2019/07/06
F 1900/10/08 1959/11/15
M 1945/03/05 2015/12/25
F 1927/01/22 2008/08/16
M 1920/08/04 1986/01/22
F 1958/10/13 2019/09/19
F 1948/08/09 2015/07/29
F 1934/02/13 2015/08/20
F 1937/03/24 2001/01/12
M 1957/04/05 2005/12/22
M 1960/01/17 2010/08/10
M 1926/02/26 1996/04/25
M 1923/04/13 1937/02/11
F 1895/07/18 1992/12/23
M 1976/10/28 2018/05/30
M 1917/08/16 2013/05/04
F 1935/03/31 2006/07/31
F 1926/03/23 1993/07/13
M 1959/08/11 1980/02/17
M 1981/10/10 2011/02/18
F 1930/01/01 2007/02/24
M 1932/11/30 1996/06/30
M 19/36/04/11 2004/09/27
"
raw <- read.table(text = txt, header = TRUE, fill = TRUE)
# Eliminar filas con fechas imposibles
raw <- raw %>% filter(nchar(as.character(Nacimiento)) >= 8)
# Convertir fechas
raw$Nacimiento <- as.Date(raw$Nacimiento, format = "%Y/%m/%d")
raw$Muerte <- as.Date(raw$Muerte, format = "%Y/%m/%d")
raw
## Sexo Nacimiento Muerte
## 1 F 1929-03-09 2019-07-06
## 2 F 1900-10-08 1959-11-15
## 3 M 1945-03-05 2015-12-25
## 4 F 1927-01-22 2008-08-16
## 5 M 1920-08-04 1986-01-22
## 6 F 1958-10-13 2019-09-19
## 7 F 1948-08-09 2015-07-29
## 8 F 1934-02-13 2015-08-20
## 9 F 1937-03-24 2001-01-12
## 10 M 1957-04-05 2005-12-22
## 11 M 1960-01-17 2010-08-10
## 12 M 1926-02-26 1996-04-25
## 13 M 1923-04-13 1937-02-11
## 14 F 1895-07-18 1992-12-23
## 15 M 1976-10-28 2018-05-30
## 16 M 1917-08-16 2013-05-04
## 17 F 1935-03-31 2006-07-31
## 18 F 1926-03-23 1993-07-13
## 19 M 1959-08-11 1980-02-17
## 20 M 1981-10-10 2011-02-18
## 21 F 1930-01-01 2007-02-24
## 22 M 1932-11-30 1996-06-30
## 23 M <NA> 2004-09-27
raw$edad_muerte <- as.numeric(difftime(raw$Muerte, raw$Nacimiento, units = "days"))
supervivencia <- Surv(time = raw$edad_muerte,
event = !is.na(raw$Muerte))
km_fit <- survfit(supervivencia ~ 1)
summary(km_fit)
## Call: survfit(formula = supervivencia ~ 1)
##
## 1 observation deleted due to missingness
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 5053 22 1 0.9545 0.0444 0.8714 1.000
## 7495 21 1 0.9091 0.0613 0.7966 1.000
## 10723 20 1 0.8636 0.0732 0.7315 1.000
## 15189 19 1 0.8182 0.0822 0.6719 0.996
## 17793 18 1 0.7727 0.0893 0.6160 0.969
## 18468 17 1 0.7273 0.0950 0.5631 0.939
## 21587 16 1 0.6818 0.0993 0.5125 0.907
## 22256 15 1 0.6364 0.1026 0.4640 0.873
## 23223 14 1 0.5909 0.1048 0.4174 0.837
## 23305 13 1 0.5455 0.1062 0.3725 0.799
## 23912 12 1 0.5000 0.1066 0.3292 0.759
## 24460 11 1 0.4545 0.1062 0.2876 0.718
## 24584 10 1 0.4091 0.1048 0.2476 0.676
## 25626 9 1 0.3636 0.1026 0.2092 0.632
## 25862 8 1 0.3182 0.0993 0.1726 0.587
## 26055 7 1 0.2727 0.0950 0.1378 0.540
## 28178 6 1 0.2273 0.0893 0.1052 0.491
## 29773 5 1 0.1818 0.0822 0.0749 0.441
## 29792 4 1 0.1364 0.0732 0.0476 0.390
## 32991 3 1 0.0909 0.0613 0.0243 0.341
## 34960 2 1 0.0455 0.0444 0.0067 0.308
## 35587 1 1 0.0000 NaN NA NA
plot(km_fit,
main = "Curva de Supervivencia",
xlab = "Días",
ylab = "S(t)")
