#CARGAR PAQUETE
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ stringr 1.4.0
## ✔ tidyr 1.2.0 ✔ forcats 0.5.1
## ✔ readr 2.1.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(dlookr)
##
## Attaching package: 'dlookr'
##
## The following object is masked from 'package:tidyr':
##
## extract
##
## The following object is masked from 'package:base':
##
## transform
library(tidyverse)
library(ggplot2)
library(dlookr)
library(survival)
library(ggpubr)
library(survminer)
##
## Attaching package: 'survminer'
##
## The following object is masked from 'package:survival':
##
## myeloma
library(finalfit)
BASE DE DATOS PRUEBA
library(readxl)
covidmx <- read.csv("covidbmx.csv")
#EDA
library(funModeling)
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:dlookr':
##
## describe
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
## funModeling v.1.9.4 :)
## Examples and tutorials at livebook.datascienceheroes.com
## / Now in Spanish: librovivodecienciadedatos.ai
library(tidyverse)
library(Hmisc)
covieda <- covidmx
covieda <- covieda %>% drop_na()
freq(covieda)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## sex frequency percentage cumulative_perc
## 1 male 2450 68.59 68.59
## 2 female 1122 31.41 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## NEU frequency percentage cumulative_perc
## 1 Positive 2628 73.57 73.57
## 2 Negative 944 26.43 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## DM frequency percentage cumulative_perc
## 1 Negative 2886 80.8 80.8
## 2 Positive 686 19.2 100.0
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## COPD frequency percentage cumulative_perc
## 1 Negative 3535 98.96 98.96
## 2 Positive 37 1.04 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## ASTH frequency percentage cumulative_perc
## 1 Negative 3458 96.81 96.81
## 2 Positive 114 3.19 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## IMM frequency percentage cumulative_perc
## 1 Negative 3415 95.6 95.6
## 2 Positive 157 4.4 100.0
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## AH frequency percentage cumulative_perc
## 1 Negative 2894 81.02 81.02
## 2 Positive 678 18.98 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## CVD frequency percentage cumulative_perc
## 1 Negative 3499 97.96 97.96
## 2 Positive 73 2.04 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## OBE frequency percentage cumulative_perc
## 1 Negative 2344 65.62 65.62
## 2 Positive 1228 34.38 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## CKD frequency percentage cumulative_perc
## 1 Negative 3271 91.57 91.57
## 2 Positive 301 8.43 100.00
## [1] "Variables processed: sex, NEU, DM, COPD, ASTH, IMM, AH, CVD, OBE, CKD"
library(gtsummary)
covidmx %>% tbl_summary(by=status) %>% add_p() %>% add_overall()
## There was an error in 'add_p()/add_difference()' for variable 'time', p-value omitted:
## Error in wilcox.test.formula(as.numeric(time) ~ as.factor(status), data = structure(list(: grouping factor must have exactly 2 levels
## There was an error in 'add_p()/add_difference()' for variable 'COPD', p-value omitted:
## Error in stats::fisher.test(c("Negative", "Negative", "Negative", "Negative", : FEXACT error 40.
## Out of workspace.
## There was an error in 'add_p()/add_difference()' for variable 'IMM', p-value omitted:
## Error in stats::fisher.test(c("Negative", "Negative", "Negative", "Negative", : FEXACT error 40.
## Out of workspace.
## There was an error in 'add_p()/add_difference()' for variable 'CVD', p-value omitted:
## Error in stats::fisher.test(c("Negative", "Negative", "Negative", "Negative", : FEXACT error 40.
## Out of workspace.
## There was an error in 'add_p()/add_difference()' for variable 'CKD', p-value omitted:
## Error in stats::fisher.test(c("Negative", "Negative", "Negative", "Negative", : FEXACT error 40.
## Out of workspace.
## Warning: The `fmt_missing()` function is deprecated and will soon be removed
## * Use the `sub_missing()` function instead
| Characteristic | Overall, N = 1,048,5751 | 0, N = 1,045,0031 | 1, N = 3,5721 | p-value2 |
|---|---|---|---|---|
| sex | <0.001 | |||
| 897,916 (86%) | 897,916 (86%) | 0 (0%) | ||
| female | 73,550 (7.0%) | 72,428 (6.9%) | 1,122 (31%) | |
| male | 77,109 (7.4%) | 74,659 (7.1%) | 2,450 (69%) | |
| Age | 32 (27, 36) | 32 (27, 36) | 35 (31, 38) | <0.001 |
| Unknown | 897,916 | 897,916 | 0 | |
| time | 6 (2, 12) | NA (NA, NA) | 6 (2, 12) | |
| Unknown | 1,045,003 | 1,045,003 | 0 | |
| NEU | <0.001 | |||
| 897,916 (86%) | 897,916 (86%) | 0 (0%) | ||
| Negative | 137,313 (13%) | 136,369 (13%) | 944 (26%) | |
| Positive | 13,346 (1.3%) | 10,718 (1.0%) | 2,628 (74%) | |
| DM | <0.001 | |||
| 897,916 (86%) | 897,916 (86%) | 0 (0%) | ||
| Negative | 144,952 (14%) | 142,066 (14%) | 2,886 (81%) | |
| Positive | 5,707 (0.5%) | 5,021 (0.5%) | 686 (19%) | |
| COPD | ||||
| 897,916 (86%) | 897,916 (86%) | 0 (0%) | ||
| Negative | 150,277 (14%) | 146,742 (14%) | 3,535 (99%) | |
| Positive | 382 (<0.1%) | 345 (<0.1%) | 37 (1.0%) | |
| ASTH | <0.001 | |||
| 897,916 (86%) | 897,916 (86%) | 0 (0%) | ||
| Negative | 146,223 (14%) | 142,765 (14%) | 3,458 (97%) | |
| Positive | 4,436 (0.4%) | 4,322 (0.4%) | 114 (3.2%) | |
| IMM | ||||
| 897,916 (86%) | 897,916 (86%) | 0 (0%) | ||
| Negative | 149,598 (14%) | 146,183 (14%) | 3,415 (96%) | |
| Positive | 1,061 (0.1%) | 904 (<0.1%) | 157 (4.4%) | |
| AH | <0.001 | |||
| 897,916 (86%) | 897,916 (86%) | 0 (0%) | ||
| Negative | 142,922 (14%) | 140,028 (13%) | 2,894 (81%) | |
| Positive | 7,737 (0.7%) | 7,059 (0.7%) | 678 (19%) | |
| CVD | ||||
| 897,916 (86%) | 897,916 (86%) | 0 (0%) | ||
| Negative | 149,671 (14%) | 146,172 (14%) | 3,499 (98%) | |
| Positive | 988 (<0.1%) | 915 (<0.1%) | 73 (2.0%) | |
| OBE | <0.001 | |||
| 897,916 (86%) | 897,916 (86%) | 0 (0%) | ||
| Negative | 126,004 (12%) | 123,660 (12%) | 2,344 (66%) | |
| Positive | 24,655 (2.4%) | 23,427 (2.2%) | 1,228 (34%) | |
| CKD | ||||
| 897,916 (86%) | 897,916 (86%) | 0 (0%) | ||
| Negative | 149,228 (14%) | 145,957 (14%) | 3,271 (92%) | |
| Positive | 1,431 (0.1%) | 1,130 (0.1%) | 301 (8.4%) | |
| 1 n (%); Median (IQR) | ||||
| 2 Pearson's Chi-squared test; Wilcoxon rank sum test | ||||
#overall analysisi
covid<- covidmx
ggsurvplot(
fit = survfit(Surv(time, status) ~ 1, data = covid),
xlab = "Days",
ylab = "Overall survival probability")
overall<-survfit(Surv(time, status) ~ 1, data = covid)
ggsurvall<-ggsurvplot(overall, data = covid, xlab = "Days", conf.int = TRUE,
risk.table = TRUE,tables.height = 0.20, fontsize = 7, legend.title = "Overall", xlim = c(0,28),break.time.by = 2,
font.title = c(20),
font.subtitle = c(20),
font.caption = c(20),
font.x = c(20),
font.y = c(20),
font.tickslab = c(20))
ggsurvall$plot
ggsurvall$plot <- ggsurvall$plot +
theme(legend.title = element_text(size = 15),legend.text = element_text(size = 15))
ggsurvall
#Compute survival curves of patients stratified by treatment assignments (rx)
SEX <- survfit(Surv(time, status) ~ sex, data=covid)
NEU <- survfit(Surv(time, status) ~ NEU, data = covid)
DM <- survfit( Surv(time, status) ~ DM, data = covid)
COPD <- survfit(Surv(time, status) ~ COPD , data = covid)
ASTH <- survfit(Surv(time, status) ~ ASTH, data = covid)
IMM <- survfit(Surv(time, status) ~ IMM , data = covid)
AH <- survfit(Surv(time, status) ~ AH , data = covid)
CVD <- survfit(Surv(time, status) ~ CVD , data = covid)
OBE <- survfit(Surv(time, status) ~ OBE , data = covid)
CKD <- survfit(Surv(time, status) ~ CKD , data = covid)
SEX
## Call: survfit(formula = Surv(time, status) ~ sex, data = covid)
##
## 1045003 observations deleted due to missingness
## n events median 0.95LCL 0.95UCL
## sex=female 1122 1122 5 5 6
## sex=male 2450 2450 6 6 6
summary(NEU)$table
## records n.max n.start events rmean se(rmean) median 0.95LCL
## NEU=Negative 944 944 944 944 9.523305 0.3783774 6 6
## NEU=Positive 2628 2628 2628 2628 8.261416 0.1835962 6 5
## 0.95UCL
## NEU=Negative 7
## NEU=Positive 6
DM
## Call: survfit(formula = Surv(time, status) ~ DM, data = covid)
##
## 1045003 observations deleted due to missingness
## n events median 0.95LCL 0.95UCL
## DM=Negative 2886 2886 6 6 7
## DM=Positive 686 686 4 4 5
COPD
## Call: survfit(formula = Surv(time, status) ~ COPD, data = covid)
##
## 1045003 observations deleted due to missingness
## n events median 0.95LCL 0.95UCL
## COPD=Negative 3535 3535 6 6 6
## COPD=Positive 37 37 5 3 8
ASTH
## Call: survfit(formula = Surv(time, status) ~ ASTH, data = covid)
##
## 1045003 observations deleted due to missingness
## n events median 0.95LCL 0.95UCL
## ASTH=Negative 3458 3458 6 6 6
## ASTH=Positive 114 114 6 5 7
IMM
## Call: survfit(formula = Surv(time, status) ~ IMM, data = covid)
##
## 1045003 observations deleted due to missingness
## n events median 0.95LCL 0.95UCL
## IMM=Negative 3415 3415 6 6 6
## IMM=Positive 157 157 5 4 7
AH
## Call: survfit(formula = Surv(time, status) ~ AH, data = covid)
##
## 1045003 observations deleted due to missingness
## n events median 0.95LCL 0.95UCL
## AH=Negative 2894 2894 6 6 6
## AH=Positive 678 678 5 5 6
CVD
## Call: survfit(formula = Surv(time, status) ~ CVD, data = covid)
##
## 1045003 observations deleted due to missingness
## n events median 0.95LCL 0.95UCL
## CVD=Negative 3499 3499 6 6 6
## CVD=Positive 73 73 5 4 7
OBE
## Call: survfit(formula = Surv(time, status) ~ OBE, data = covid)
##
## 1045003 observations deleted due to missingness
## n events median 0.95LCL 0.95UCL
## OBE=Negative 2344 2344 6 6 7
## OBE=Positive 1228 1228 5 5 6
CKD
## Call: survfit(formula = Surv(time, status) ~ CKD, data = covid)
##
## 1045003 observations deleted due to missingness
## n events median 0.95LCL 0.95UCL
## CKD=Negative 3271 3271 6 6 6
## CKD=Positive 301 301 5 4 5
library(ggthemes)
ggsurvneu <-ggsurvplot(NEU, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,
risk.table = TRUE, tables.height = 0.20,fontsize = 7, break.time.by = 2, legend.title = "NEU", legend.labs = c("Negative", "Positive"),xlim = c(0,28),
font.title = c(20),
font.subtitle = c(20),
font.caption = c(20),
font.x = c(20),
font.y = c(20),
font.tickslab = c(20))
ggsurvneu$plot
ggsurvneu$plot <- ggsurvneu$plot +
theme(legend.title = element_text(size = 15),legend.text = element_text(size = 15))
ggsurvneu
ggsurvdm <-ggsurvplot(DM, data = covid, pval = TRUE, pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,
risk.table = TRUE,tables.height = 0.20, fontsize = 7, break.time.by = 2, legend.title = "DM", legend.labs = c("Negative", "Positive"),xlim = c(0,28),
font.title = c(20),
font.subtitle = c(20),
font.caption = c(20),
font.x = c(20),
font.y = c(20),
font.tickslab = c(20))
ggsurvdm$plot
ggsurvdm$plot <- ggsurvdm$plot +
theme(legend.title = element_text(size = 15),legend.text = element_text(size = 15))
ggsurvdm
ggsurvplot(COPD, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,xlim = c(0,28),
risk.table = TRUE,tables.height = 0.20, break.time.by = 2, )
ggsurvplot(ASTH, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,xlim = c(0,28),
risk.table = TRUE,tables.height = 0.20, break.time.by = 2)
ggsurvplot(IMM, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,xlim = c(0,28),
risk.table = TRUE, tables.height = 0.20, break.time.by = 2)
ggsurvplot(CVD, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,xlim = c(0,28),
risk.table = TRUE, tables.height = 0.20, break.time.by = 2)
ggsurvplot(AH, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,xlim = c(0,28),
risk.table = TRUE, tables.height = 0.20, break.time.by = 2)
ggsurvobe<-ggsurvplot(OBE, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,
risk.table = TRUE,tables.height = 0.20,fontsize = 7, break.time.by = 20,legend.title = "OBE", legend.labs = c("Negative", "Positive"),
font.title = c(20),
font.subtitle = c(20),
font.caption = c(20),
font.x = c(20),
font.y = c(20),
font.tickslab = c(20))
ggsurvobe$plot
ggsurvobe$plot <- ggsurvobe$plot +
theme(legend.title = element_text(size = 15),legend.text = element_text(size = 15))
ggsurvobe
ggsurvplot(CKD, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,
risk.table = TRUE,tables.height = 0.20, break.time.by = 20)
require("survminer")
splots <- list()
#ajustes a la gr?fica de sobrevida y tabla:
#https://cran.r-project.org/web/packages/survminer/vignettes/Playing_with_fonts_and_texts.html
# Arrange multiple ggsurvplots and print the output
#http://rpkgs.datanovia.com/survminer/reference/arrange_ggsurvplots.html
splots[[1]]<- ggsurvplot(NEU, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)
splots[[2]] <- ggsurvplot(DM, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)
splots[[3]] <- ggsurvplot(COPD, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)
splots[[4]] <-ggsurvplot(ASTH, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)
# Arrange multiple ggsurvplots and print the output
fig1<-arrange_ggsurvplots(splots, print = T,
ncol = 2, nrow =2 , risk.table.height = 0.4)
fig1
splots[[1]] <-ggsurvplot(IMM, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)
splots[[2]] <-ggsurvplot(CVD, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)
splots[[3]] <-ggsurvplot(OBE, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)
splots[[4]] <-ggsurvplot(CKD, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)
fig2<-arrange_ggsurvplots(splots, print = T,
ncol = 2, nrow =2 , risk.table.height = 0.4)
fig2
model.citok
## Call:
## coxph(formula = Surv(time, status) ~ +NEU + DM + COPD + ASTH +
## IMM + AH + CVD + OBE + CKD, data = covieda)
##
## coef exp(coef) se(coef) z p
## NEUPositive 0.115586 1.122531 0.038221 3.024 0.002494
## DMPositive 0.161706 1.175515 0.043884 3.685 0.000229
## COPDPositive 0.158116 1.171302 0.167251 0.945 0.344465
## ASTHPositive -0.058802 0.942893 0.095500 -0.616 0.538073
## IMMPositive 0.002329 1.002331 0.083786 0.028 0.977827
## AHPositive 0.042957 1.043893 0.046887 0.916 0.359569
## CVDPositive 0.062836 1.064852 0.120818 0.520 0.603004
## OBEPositive 0.118700 1.126032 0.036055 3.292 0.000994
## CKDPositive 0.038088 1.038823 0.066635 0.572 0.567597
##
## Likelihood ratio test=43.43 on 9 df, p=1.797e-06
## n= 3572, number of events= 3572
require("survival")
require(ggplot2)
require(ggpubr)
ggforest(model.citok, data=covieda)