#CARGAR PAQUETE

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ stringr 1.4.0
## ✔ tidyr   1.2.0     ✔ forcats 0.5.1
## ✔ readr   2.1.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(dlookr)
## 
## Attaching package: 'dlookr'
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## The following object is masked from 'package:base':
## 
##     transform
library(tidyverse)
library(ggplot2)
library(dlookr)
library(survival)
library(ggpubr)
library(survminer)
## 
## Attaching package: 'survminer'
## 
## The following object is masked from 'package:survival':
## 
##     myeloma
library(finalfit)

BASE DE DATOS PRUEBA

library(readxl)
covidmx <- read.csv("covidbmx.csv")

Exploratory data (EDA)

#EDA
library(funModeling) 
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:dlookr':
## 
##     describe
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## funModeling v.1.9.4 :)
## Examples and tutorials at livebook.datascienceheroes.com
##  / Now in Spanish: librovivodecienciadedatos.ai
library(tidyverse) 
library(Hmisc)

covieda <- covidmx

covieda <- covieda %>% drop_na()


freq(covieda)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
##      sex frequency percentage cumulative_perc
## 1   male      2450      68.59           68.59
## 2 female      1122      31.41          100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##        NEU frequency percentage cumulative_perc
## 1 Positive      2628      73.57           73.57
## 2 Negative       944      26.43          100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##         DM frequency percentage cumulative_perc
## 1 Negative      2886       80.8            80.8
## 2 Positive       686       19.2           100.0
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##       COPD frequency percentage cumulative_perc
## 1 Negative      3535      98.96           98.96
## 2 Positive        37       1.04          100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##       ASTH frequency percentage cumulative_perc
## 1 Negative      3458      96.81           96.81
## 2 Positive       114       3.19          100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##        IMM frequency percentage cumulative_perc
## 1 Negative      3415       95.6            95.6
## 2 Positive       157        4.4           100.0
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##         AH frequency percentage cumulative_perc
## 1 Negative      2894      81.02           81.02
## 2 Positive       678      18.98          100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##        CVD frequency percentage cumulative_perc
## 1 Negative      3499      97.96           97.96
## 2 Positive        73       2.04          100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##        OBE frequency percentage cumulative_perc
## 1 Negative      2344      65.62           65.62
## 2 Positive      1228      34.38          100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##        CKD frequency percentage cumulative_perc
## 1 Negative      3271      91.57           91.57
## 2 Positive       301       8.43          100.00
## [1] "Variables processed: sex, NEU, DM, COPD, ASTH, IMM, AH, CVD, OBE, CKD"

Stats

library(gtsummary)
covidmx %>% tbl_summary(by=status) %>% add_p() %>% add_overall()
## There was an error in 'add_p()/add_difference()' for variable 'time', p-value omitted:
## Error in wilcox.test.formula(as.numeric(time) ~ as.factor(status), data = structure(list(: grouping factor must have exactly 2 levels
## There was an error in 'add_p()/add_difference()' for variable 'COPD', p-value omitted:
## Error in stats::fisher.test(c("Negative", "Negative", "Negative", "Negative", : FEXACT error 40.
## Out of workspace.
## There was an error in 'add_p()/add_difference()' for variable 'IMM', p-value omitted:
## Error in stats::fisher.test(c("Negative", "Negative", "Negative", "Negative", : FEXACT error 40.
## Out of workspace.
## There was an error in 'add_p()/add_difference()' for variable 'CVD', p-value omitted:
## Error in stats::fisher.test(c("Negative", "Negative", "Negative", "Negative", : FEXACT error 40.
## Out of workspace.
## There was an error in 'add_p()/add_difference()' for variable 'CKD', p-value omitted:
## Error in stats::fisher.test(c("Negative", "Negative", "Negative", "Negative", : FEXACT error 40.
## Out of workspace.
## Warning: The `fmt_missing()` function is deprecated and will soon be removed
## * Use the `sub_missing()` function instead
Characteristic Overall, N = 1,048,5751 0, N = 1,045,0031 1, N = 3,5721 p-value2
sex <0.001
897,916 (86%) 897,916 (86%) 0 (0%)
female 73,550 (7.0%) 72,428 (6.9%) 1,122 (31%)
male 77,109 (7.4%) 74,659 (7.1%) 2,450 (69%)
Age 32 (27, 36) 32 (27, 36) 35 (31, 38) <0.001
Unknown 897,916 897,916 0
time 6 (2, 12) NA (NA, NA) 6 (2, 12)
Unknown 1,045,003 1,045,003 0
NEU <0.001
897,916 (86%) 897,916 (86%) 0 (0%)
Negative 137,313 (13%) 136,369 (13%) 944 (26%)
Positive 13,346 (1.3%) 10,718 (1.0%) 2,628 (74%)
DM <0.001
897,916 (86%) 897,916 (86%) 0 (0%)
Negative 144,952 (14%) 142,066 (14%) 2,886 (81%)
Positive 5,707 (0.5%) 5,021 (0.5%) 686 (19%)
COPD
897,916 (86%) 897,916 (86%) 0 (0%)
Negative 150,277 (14%) 146,742 (14%) 3,535 (99%)
Positive 382 (<0.1%) 345 (<0.1%) 37 (1.0%)
ASTH <0.001
897,916 (86%) 897,916 (86%) 0 (0%)
Negative 146,223 (14%) 142,765 (14%) 3,458 (97%)
Positive 4,436 (0.4%) 4,322 (0.4%) 114 (3.2%)
IMM
897,916 (86%) 897,916 (86%) 0 (0%)
Negative 149,598 (14%) 146,183 (14%) 3,415 (96%)
Positive 1,061 (0.1%) 904 (<0.1%) 157 (4.4%)
AH <0.001
897,916 (86%) 897,916 (86%) 0 (0%)
Negative 142,922 (14%) 140,028 (13%) 2,894 (81%)
Positive 7,737 (0.7%) 7,059 (0.7%) 678 (19%)
CVD
897,916 (86%) 897,916 (86%) 0 (0%)
Negative 149,671 (14%) 146,172 (14%) 3,499 (98%)
Positive 988 (<0.1%) 915 (<0.1%) 73 (2.0%)
OBE <0.001
897,916 (86%) 897,916 (86%) 0 (0%)
Negative 126,004 (12%) 123,660 (12%) 2,344 (66%)
Positive 24,655 (2.4%) 23,427 (2.2%) 1,228 (34%)
CKD
897,916 (86%) 897,916 (86%) 0 (0%)
Negative 149,228 (14%) 145,957 (14%) 3,271 (92%)
Positive 1,431 (0.1%) 1,130 (0.1%) 301 (8.4%)
1 n (%); Median (IQR)
2 Pearson's Chi-squared test; Wilcoxon rank sum test

Survival analyisis

#overall analysisi

covid<- covidmx
ggsurvplot(
  fit = survfit(Surv(time, status) ~ 1, data = covid), 
  xlab = "Days", 
  ylab = "Overall survival probability")

overall<-survfit(Surv(time, status) ~ 1, data = covid)

ggsurvall<-ggsurvplot(overall, data = covid, xlab = "Days", conf.int = TRUE,
           risk.table = TRUE,tables.height = 0.20, fontsize = 7,  legend.title = "Overall", xlim = c(0,28),break.time.by = 2,
           font.title = c(20),
           font.subtitle = c(20),
           font.caption = c(20),
           font.x = c(20),
           font.y = c(20),
           font.tickslab = c(20))

ggsurvall$plot

ggsurvall$plot <- ggsurvall$plot +  
  theme(legend.title = element_text(size = 15),legend.text = element_text(size = 15))

ggsurvall

#Compute survival curves of patients stratified by treatment assignments (rx)

SEX <- survfit(Surv(time, status) ~ sex, data=covid)
NEU <- survfit(Surv(time, status) ~ NEU, data = covid)
DM <- survfit( Surv(time, status) ~ DM, data = covid)
COPD <- survfit(Surv(time, status) ~ COPD , data = covid)
ASTH <- survfit(Surv(time, status) ~ ASTH, data = covid)
IMM <- survfit(Surv(time, status) ~ IMM , data = covid)
AH <- survfit(Surv(time, status) ~ AH , data = covid)
CVD <- survfit(Surv(time, status) ~ CVD , data = covid)
OBE <- survfit(Surv(time, status) ~ OBE , data = covid)
CKD <- survfit(Surv(time, status) ~ CKD , data = covid)




SEX
## Call: survfit(formula = Surv(time, status) ~ sex, data = covid)
## 
##    1045003 observations deleted due to missingness 
##               n events median 0.95LCL 0.95UCL
## sex=female 1122   1122      5       5       6
## sex=male   2450   2450      6       6       6
summary(NEU)$table
##              records n.max n.start events    rmean se(rmean) median 0.95LCL
## NEU=Negative     944   944     944    944 9.523305 0.3783774      6       6
## NEU=Positive    2628  2628    2628   2628 8.261416 0.1835962      6       5
##              0.95UCL
## NEU=Negative       7
## NEU=Positive       6
DM
## Call: survfit(formula = Surv(time, status) ~ DM, data = covid)
## 
##    1045003 observations deleted due to missingness 
##                n events median 0.95LCL 0.95UCL
## DM=Negative 2886   2886      6       6       7
## DM=Positive  686    686      4       4       5
COPD
## Call: survfit(formula = Surv(time, status) ~ COPD, data = covid)
## 
##    1045003 observations deleted due to missingness 
##                  n events median 0.95LCL 0.95UCL
## COPD=Negative 3535   3535      6       6       6
## COPD=Positive   37     37      5       3       8
ASTH
## Call: survfit(formula = Surv(time, status) ~ ASTH, data = covid)
## 
##    1045003 observations deleted due to missingness 
##                  n events median 0.95LCL 0.95UCL
## ASTH=Negative 3458   3458      6       6       6
## ASTH=Positive  114    114      6       5       7
IMM
## Call: survfit(formula = Surv(time, status) ~ IMM, data = covid)
## 
##    1045003 observations deleted due to missingness 
##                 n events median 0.95LCL 0.95UCL
## IMM=Negative 3415   3415      6       6       6
## IMM=Positive  157    157      5       4       7
AH
## Call: survfit(formula = Surv(time, status) ~ AH, data = covid)
## 
##    1045003 observations deleted due to missingness 
##                n events median 0.95LCL 0.95UCL
## AH=Negative 2894   2894      6       6       6
## AH=Positive  678    678      5       5       6
CVD
## Call: survfit(formula = Surv(time, status) ~ CVD, data = covid)
## 
##    1045003 observations deleted due to missingness 
##                 n events median 0.95LCL 0.95UCL
## CVD=Negative 3499   3499      6       6       6
## CVD=Positive   73     73      5       4       7
OBE
## Call: survfit(formula = Surv(time, status) ~ OBE, data = covid)
## 
##    1045003 observations deleted due to missingness 
##                 n events median 0.95LCL 0.95UCL
## OBE=Negative 2344   2344      6       6       7
## OBE=Positive 1228   1228      5       5       6
CKD
## Call: survfit(formula = Surv(time, status) ~ CKD, data = covid)
## 
##    1045003 observations deleted due to missingness 
##                 n events median 0.95LCL 0.95UCL
## CKD=Negative 3271   3271      6       6       6
## CKD=Positive  301    301      5       4       5

KM plots

library(ggthemes)

ggsurvneu <-ggsurvplot(NEU, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,
           risk.table = TRUE, tables.height = 0.20,fontsize = 7, break.time.by = 2, legend.title = "NEU", legend.labs = c("Negative", "Positive"),xlim = c(0,28),
           font.title = c(20),
           font.subtitle = c(20),
           font.caption = c(20),
           font.x = c(20),
           font.y = c(20),
           font.tickslab = c(20))

ggsurvneu$plot

ggsurvneu$plot <- ggsurvneu$plot + 
  theme(legend.title = element_text(size = 15),legend.text = element_text(size = 15))

ggsurvneu

ggsurvdm <-ggsurvplot(DM, data = covid, pval = TRUE, pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,
           risk.table = TRUE,tables.height = 0.20, fontsize = 7, break.time.by = 2, legend.title = "DM", legend.labs = c("Negative", "Positive"),xlim = c(0,28),
           font.title = c(20),
           font.subtitle = c(20),
           font.caption = c(20),
           font.x = c(20),
           font.y = c(20),
           font.tickslab = c(20))

ggsurvdm$plot

ggsurvdm$plot <- ggsurvdm$plot + 
  theme(legend.title = element_text(size = 15),legend.text = element_text(size = 15))

ggsurvdm

ggsurvplot(COPD, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,xlim = c(0,28),
           risk.table = TRUE,tables.height = 0.20, break.time.by = 2, )

ggsurvplot(ASTH, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,xlim = c(0,28),
           risk.table = TRUE,tables.height = 0.20, break.time.by = 2)

ggsurvplot(IMM, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,xlim = c(0,28),
           risk.table = TRUE, tables.height = 0.20, break.time.by = 2)

ggsurvplot(CVD, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,xlim = c(0,28),
           risk.table = TRUE, tables.height = 0.20, break.time.by = 2)

ggsurvplot(AH, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,xlim = c(0,28),
           risk.table = TRUE, tables.height = 0.20, break.time.by = 2)

ggsurvobe<-ggsurvplot(OBE, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,
           risk.table = TRUE,tables.height = 0.20,fontsize = 7, break.time.by = 20,legend.title = "OBE", legend.labs = c("Negative", "Positive"),
           font.title = c(20),
           font.subtitle = c(20),
           font.caption = c(20),
           font.x = c(20),
           font.y = c(20),
           font.tickslab = c(20))

ggsurvobe$plot

ggsurvobe$plot <- ggsurvobe$plot + 
  theme(legend.title = element_text(size = 15),legend.text = element_text(size = 15))

ggsurvobe

ggsurvplot(CKD, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,
           risk.table = TRUE,tables.height = 0.20, break.time.by = 20)

KM grid plots

require("survminer")

splots <- list()

#ajustes a la gr?fica de sobrevida y tabla:
#https://cran.r-project.org/web/packages/survminer/vignettes/Playing_with_fonts_and_texts.html

# Arrange multiple ggsurvplots and print the output
#http://rpkgs.datanovia.com/survminer/reference/arrange_ggsurvplots.html

splots[[1]]<- ggsurvplot(NEU, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)
splots[[2]] <- ggsurvplot(DM, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)
splots[[3]] <- ggsurvplot(COPD, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)
splots[[4]] <-ggsurvplot(ASTH, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)



# Arrange multiple ggsurvplots and print the output
fig1<-arrange_ggsurvplots(splots, print = T,
                    ncol = 2, nrow =2 , risk.table.height = 0.4)

fig1

splots[[1]] <-ggsurvplot(IMM, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)
splots[[2]] <-ggsurvplot(CVD, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)
splots[[3]] <-ggsurvplot(OBE, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)
splots[[4]] <-ggsurvplot(CKD, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)

fig2<-arrange_ggsurvplots(splots, print = T,
                    ncol = 2, nrow =2 , risk.table.height = 0.4)


fig2

COX-REGRESSION & FOREST PLOT ANALYSIS

model.citok 
## Call:
## coxph(formula = Surv(time, status) ~ +NEU + DM + COPD + ASTH + 
##     IMM + AH + CVD + OBE + CKD, data = covieda)
## 
##                   coef exp(coef)  se(coef)      z        p
## NEUPositive   0.115586  1.122531  0.038221  3.024 0.002494
## DMPositive    0.161706  1.175515  0.043884  3.685 0.000229
## COPDPositive  0.158116  1.171302  0.167251  0.945 0.344465
## ASTHPositive -0.058802  0.942893  0.095500 -0.616 0.538073
## IMMPositive   0.002329  1.002331  0.083786  0.028 0.977827
## AHPositive    0.042957  1.043893  0.046887  0.916 0.359569
## CVDPositive   0.062836  1.064852  0.120818  0.520 0.603004
## OBEPositive   0.118700  1.126032  0.036055  3.292 0.000994
## CKDPositive   0.038088  1.038823  0.066635  0.572 0.567597
## 
## Likelihood ratio test=43.43  on 9 df, p=1.797e-06
## n= 3572, number of events= 3572
require("survival")
require(ggplot2)
require(ggpubr)

ggforest(model.citok, data=covieda)