Este es un ejercicio del curso de Bioestadística II de la Maestría en Epidemiología Clínica II. Se tomaron los datos de la base de datos “Ca Mama”
#Datos
##Carga de paquetes
if(!require(survival)) install.packages("survival")
if(!require(KMsurv)) install.packages("KMsurv")
if(!require(survMisc)) install.packages("survMisc")
if(!require(survminer)) install.packages("survminer")
if(!require(ggfortify)) install.packages("ggfortify")
if(!require(flexsurv)) install.packages("flexsurv")
if(!require(actuar)) install.packages("actuar")
if(!require(dplyr)) install.packages("dplyr")
if(!require(psych)) install.packages("psych")
if(!require(tidyverse)) install.packages("tidyverse")
##Carga de los datos
library(haven)
mama <- read_sav("ca mama-1.sav")
mama$lnpos <- factor(mama$lnpos)
mama$histgrad <-factor(mama$histgrad)
mama$er <- factor(mama$er)
mama$pr <- factor(mama$pr)
mama$pathscat <- factor(mama$pathscat)
mama$ln_yesno <- factor(mama$ln_yesno)
mama$pathscat2 <- factor(mama$pathscat2)
#Análisis básico de supervivencia
#Funcion de supervivencia por Kaplan Meier, general
mama.surv <- Surv(mama$time, mama$status)
mama.km <- survfit(mama.surv ~1, data = mama, type = "kaplan-meier") #Estimación Kaplan Meier
summary(mama.km)
## Call: survfit(formula = mama.surv ~ 1, data = mama, type = "kaplan-meier")
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 2.63 1207 1 0.999 0.000828 0.998 1.000
## 11.03 1089 1 0.998 0.001235 0.996 1.000
## 12.00 1076 1 0.997 0.001544 0.994 1.000
## 12.20 1071 1 0.996 0.001801 0.993 1.000
## 12.43 1066 1 0.995 0.002028 0.991 0.999
## 13.03 1053 1 0.995 0.002235 0.990 0.999
## 13.10 1049 1 0.994 0.002426 0.989 0.998
## 14.73 1028 1 0.993 0.002609 0.988 0.998
## 16.20 1004 1 0.992 0.002787 0.986 0.997
## 17.13 990 1 0.991 0.002959 0.985 0.996
## 18.10 969 1 0.990 0.003128 0.983 0.996
## 18.77 956 1 0.989 0.003291 0.982 0.995
## 19.83 942 1 0.988 0.003451 0.981 0.994
## 21.27 923 1 0.986 0.003609 0.979 0.994
## 21.77 919 1 0.985 0.003762 0.978 0.993
## 22.20 914 1 0.984 0.003909 0.977 0.992
## 23.57 884 1 0.983 0.004060 0.975 0.991
## 24.33 871 1 0.982 0.004209 0.974 0.990
## 24.63 867 1 0.981 0.004354 0.972 0.989
## 25.37 859 1 0.980 0.004496 0.971 0.989
## 25.43 856 1 0.979 0.004634 0.970 0.988
## 26.13 841 1 0.977 0.004773 0.968 0.987
## 26.60 836 1 0.976 0.004908 0.967 0.986
## 27.40 828 1 0.975 0.005042 0.965 0.985
## 28.33 809 1 0.974 0.005178 0.964 0.984
## 29.27 801 1 0.973 0.005312 0.962 0.983
## 29.53 796 1 0.971 0.005444 0.961 0.982
## 29.57 795 1 0.970 0.005573 0.959 0.981
## 30.23 785 1 0.969 0.005701 0.958 0.980
## 31.53 771 1 0.968 0.005831 0.956 0.979
## 33.47 753 1 0.966 0.005963 0.955 0.978
## 36.63 707 1 0.965 0.006109 0.953 0.977
## 36.87 704 1 0.964 0.006252 0.952 0.976
## 37.70 688 1 0.962 0.006398 0.950 0.975
## 39.50 661 1 0.961 0.006552 0.948 0.974
## 40.10 656 1 0.959 0.006704 0.946 0.973
## 40.17 653 1 0.958 0.006853 0.945 0.971
## 40.80 640 1 0.956 0.007004 0.943 0.970
## 41.27 631 1 0.955 0.007155 0.941 0.969
## 41.40 628 1 0.953 0.007303 0.939 0.968
## 41.47 626 1 0.952 0.007448 0.937 0.967
## 41.53 625 1 0.950 0.007591 0.936 0.965
## 42.43 609 2 0.947 0.007880 0.932 0.963
## 42.97 604 1 0.946 0.008021 0.930 0.962
## 43.50 598 1 0.944 0.008162 0.928 0.960
## 44.37 584 1 0.942 0.008307 0.926 0.959
## 45.13 577 1 0.941 0.008452 0.924 0.958
## 45.30 574 1 0.939 0.008594 0.923 0.956
## 46.47 559 1 0.938 0.008742 0.921 0.955
## 47.97 532 1 0.936 0.008901 0.918 0.953
## 50.67 498 1 0.934 0.009079 0.916 0.952
## 52.70 466 1 0.932 0.009278 0.914 0.950
## 53.50 453 1 0.930 0.009483 0.911 0.949
## 54.60 438 1 0.928 0.009696 0.909 0.947
## 56.23 421 1 0.925 0.009921 0.906 0.945
## 56.57 417 1 0.923 0.010142 0.904 0.943
## 59.03 384 1 0.921 0.010397 0.901 0.941
## 59.13 382 1 0.918 0.010645 0.898 0.940
## 62.53 338 1 0.916 0.010955 0.895 0.937
## 64.27 314 1 0.913 0.011302 0.891 0.935
## 66.00 300 1 0.910 0.011666 0.887 0.933
## 66.80 292 2 0.904 0.012391 0.880 0.928
## 73.03 241 1 0.900 0.012894 0.875 0.925
## 75.63 219 1 0.896 0.013474 0.870 0.922
## 76.13 214 1 0.892 0.014046 0.864 0.919
## 76.63 207 1 0.887 0.014623 0.859 0.916
## 80.47 181 1 0.882 0.015342 0.853 0.913
## 81.93 164 1 0.877 0.016164 0.846 0.909
## 83.27 152 1 0.871 0.017057 0.838 0.905
## 96.50 84 1 0.861 0.019756 0.823 0.900
#Gráfica de supervivencia, KM
ggsurvplot(fit = mama.km, data = mama, conf.int = T, title = "Curva de Supervivencia General",
xlab = "Tiempo (meses)", ylab = "Probabilidad de supervivencia", legend.title = "Estimación",
legend.labs = "Kaplan-Meier", surv.median.line = c("hv"), risk.table = T, isk.table.y.text.col = T, censor= F)
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, :
## Median survival not reached.
ggsurvplot(mama.km, fun = "cumhaz", xlab = "Tiempo (meses)", censor = T,
ylab = "Riesgo Acumulado", title = "Riesgo Acumulado General")
#Gráficas por presencia de ER
mama.km.er <- survfit(mama.surv ~ er, data = mama, conf.type = "log-log")
ggsurvplot(mama.km.er, fun = "cumhaz", xlab = "Tiempo (días)", censor = T,
ylab = "Riesgo Acumulado", title = "Riesgo Acumulado por ER", legend.title = "Grado histologico")
ggsurvplot(mama.km.er, xlab = "Tiempo (días)", censor = T,
ylab = "Supervivencia", title = "Curva de Supervivencia por ER", legend.title = "Grado histologico", risk.table= T)
#Gráficas por presencia de PR
mama.km.pr <- survfit(mama.surv ~ pr, data = mama, conf.type = "log-log")
ggsurvplot(mama.km.pr, fun = "cumhaz", xlab = "Tiempo (días)", censor = T,
ylab = "Riesgo Acumulado", title = "Riesgo Acumulado por PR", legend.title = "Grado histologico")
ggsurvplot(mama.km.pr, xlab = "Tiempo (días)", censor = T,
ylab = "Supervivencia", title = "Curva de Supervivencia por PR", legend.title = "Grado histologico", risk.table= T)
#Gráficas por presencia de Path2
mama.km.path2 <- survfit(mama.surv ~ pathscat2, data = mama, conf.type = "log-log")
ggsurvplot(mama.km.path2, fun = "cumhaz", xlab = "Tiempo (días)", censor = T,
ylab = "Riesgo Acumulado", title = "Riesgo Acumulado por Path2", legend.title = "Grado histologico")
ggsurvplot(mama.km.path2, xlab = "Tiempo (días)", censor = T,
ylab = "Supervivencia", title = "Curva de Supervivencia por path2", legend.title = "Grado histologico", risk.table= T)
#Gráficas por presencia de lnyn
mama.km.ln2 <- survfit(mama.surv ~ ln_yesno, data = mama, conf.type = "log-log")
ggsurvplot(mama.km.ln2, fun = "cumhaz", xlab = "Tiempo (días)", censor = T,
ylab = "Riesgo Acumulado", title = "Riesgo Acumulado por Invasión linfatica", legend.title = "Grado histologico")
ggsurvplot(mama.km.ln2, xlab = "Tiempo (días)", censor = T,
ylab = "Supervivencia", title = "Curva de Supervivencia por invasión linfática", legend.title = "Grado histologico", risk.table= T)
#Modelo de riesgos proporcionales de Cox.
#Creación del modelo1: solo con variable ER
mama1 <- coxph(mama.surv ~ er, data = mama)
summary(mama1)
## Call:
## coxph(formula = mama.surv ~ er, data = mama)
##
## n= 869, number of events= 51
## (338 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## er1 -0.6510 0.5215 0.2814 -2.313 0.0207 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## er1 0.5215 1.917 0.3004 0.9054
##
## Concordance= 0.6 (se = 0.037 )
## Likelihood ratio test= 5.37 on 1 df, p=0.02
## Wald test = 5.35 on 1 df, p=0.02
## Score (logrank) test = 5.54 on 1 df, p=0.02
#Prueba de proporcionalidad (correlación)
RPropm1<- cox.zph(mama1)
RPropm1
## chisq df p
## er 1.72 1 0.19
## GLOBAL 1.72 1 0.19
ggcoxzph(RPropm1)
ggcoxdiagnostics(mama1, type = "schoenfeld")
ggcoxdiagnostics(mama1, type = "dfbetas", ox.scale = "observation.id", title = "Residuales dfbetas",
subtitle = "dfbetas vs id", caption = "Análisis residual")
#Creación del modelo1: solo con variable PR
mama2 <- coxph(mama.surv ~ pr, data = mama)
summary(mama2)
## Call:
## coxph(formula = mama.surv ~ pr, data = mama)
##
## n= 851, number of events= 51
## (356 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## pr1 -0.6397 0.5274 0.2868 -2.23 0.0257 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## pr1 0.5274 1.896 0.3006 0.9254
##
## Concordance= 0.595 (se = 0.036 )
## Likelihood ratio test= 5.12 on 1 df, p=0.02
## Wald test = 4.97 on 1 df, p=0.03
## Score (logrank) test = 5.15 on 1 df, p=0.02
#Prueba de proporcionalidad (correlación)
RPropm2<- cox.zph(mama2)
RPropm2
## chisq df p
## pr 1.4 1 0.24
## GLOBAL 1.4 1 0.24
ggcoxzph(RPropm2)
ggcoxdiagnostics(mama2, type = "schoenfeld")
ggcoxdiagnostics(mama2, type = "dfbetas", ox.scale = "observation.id", title = "Residuales dfbetas",
subtitle = "dfbetas vs id", caption = "Análisis residual")
#Creación del modelo1: solo con variable PR
mama3 <- coxph(mama.surv ~ age + histgrad +er + pr + ln_yesno +pathscat2, data = mama)
summary(mama3)
## Call:
## coxph(formula = mama.surv ~ age + histgrad + er + pr + ln_yesno +
## pathscat2, data = mama)
##
## n= 660, number of events= 40
## (547 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age -0.02011 0.98009 0.01373 -1.465 0.1429
## histgrad2 0.72926 2.07354 1.03483 0.705 0.4810
## histgrad3 0.89299 2.44243 1.05371 0.847 0.3967
## er1 -0.06001 0.94176 0.42742 -0.140 0.8883
## pr1 -0.47561 0.62150 0.41981 -1.133 0.2572
## ln_yesno1 0.78214 2.18614 0.33032 2.368 0.0179 *
## pathscat22 0.69719 2.00810 0.32718 2.131 0.0331 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 0.9801 1.0203 0.9541 1.007
## histgrad2 2.0735 0.4823 0.2728 15.760
## histgrad3 2.4424 0.4094 0.3097 19.264
## er1 0.9418 1.0618 0.4075 2.177
## pr1 0.6215 1.6090 0.2730 1.415
## ln_yesno1 2.1861 0.4574 1.1442 4.177
## pathscat22 2.0081 0.4980 1.0575 3.813
##
## Concordance= 0.702 (se = 0.049 )
## Likelihood ratio test= 24.18 on 7 df, p=0.001
## Wald test = 23.62 on 7 df, p=0.001
## Score (logrank) test = 26.07 on 7 df, p=5e-04
#Prueba de proporcionalidad (correlación)
RPropm3<- cox.zph(mama3)
RPropm3
## chisq df p
## age 0.1670 1 0.68
## histgrad 0.0628 2 0.97
## er 2.3231 1 0.13
## pr 1.9918 1 0.16
## ln_yesno 1.2726 1 0.26
## pathscat2 0.0505 1 0.82
## GLOBAL 6.3122 7 0.50
ggcoxzph(RPropm3)
ggcoxdiagnostics(mama3, type = "schoenfeld")
ggcoxdiagnostics(mama3, type = "dfbetas", ox.scale = "observation.id", title = "Residuales dfbetas",
subtitle = "dfbetas vs id", caption = "Análisis residual")
#---------
#marginalModelPlots(modelo)
#residualPlots(modelo)
#outlierTest(modelo)
#influenceIndexPlot(modelo)
#influencePlot(modelo)