#CARGAR PAQUETE

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyverse)

## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──

## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ stringr 1.4.0
## ✔ tidyr   1.2.0     ✔ forcats 0.5.1
## ✔ readr   2.1.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(dlookr)

## 
## Attaching package: 'dlookr'
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## The following object is masked from 'package:base':
## 
##     transform

library(tidyverse)
library(ggplot2)
library(dlookr)
library(survival)
library(ggpubr)
library(survminer)

## 
## Attaching package: 'survminer'
## 
## The following object is masked from 'package:survival':
## 
##     myeloma

library(finalfit)

BASE DE DATOS PRUEBA

library(readxl)
covidmx <- read.csv("covidbmx.csv")

Exploratory data (EDA)

#EDA
library(funModeling)

## Loading required package: Hmisc

## Loading required package: lattice

## Loading required package: Formula

## 
## Attaching package: 'Hmisc'

## The following object is masked from 'package:dlookr':
## 
##     describe

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

## funModeling v.1.9.4 :)
## Examples and tutorials at livebook.datascienceheroes.com
##  / Now in Spanish: librovivodecienciadedatos.ai

library(tidyverse) 
library(Hmisc)

covieda <- covidmx

covieda <- covieda %>% drop_na()


freq(covieda)

## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##      sex frequency percentage cumulative_perc
## 1   male      2450      68.59           68.59
## 2 female      1122      31.41          100.00

## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##        NEU frequency percentage cumulative_perc
## 1 Positive      2628      73.57           73.57
## 2 Negative       944      26.43          100.00

## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##         DM frequency percentage cumulative_perc
## 1 Negative      2886       80.8            80.8
## 2 Positive       686       19.2           100.0

## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##       COPD frequency percentage cumulative_perc
## 1 Negative      3535      98.96           98.96
## 2 Positive        37       1.04          100.00

## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##       ASTH frequency percentage cumulative_perc
## 1 Negative      3458      96.81           96.81
## 2 Positive       114       3.19          100.00

## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##        IMM frequency percentage cumulative_perc
## 1 Negative      3415       95.6            95.6
## 2 Positive       157        4.4           100.0

## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##         AH frequency percentage cumulative_perc
## 1 Negative      2894      81.02           81.02
## 2 Positive       678      18.98          100.00

## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##        CVD frequency percentage cumulative_perc
## 1 Negative      3499      97.96           97.96
## 2 Positive        73       2.04          100.00

## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##        OBE frequency percentage cumulative_perc
## 1 Negative      2344      65.62           65.62
## 2 Positive      1228      34.38          100.00

## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##        CKD frequency percentage cumulative_perc
## 1 Negative      3271      91.57           91.57
## 2 Positive       301       8.43          100.00

## [1] "Variables processed: sex, NEU, DM, COPD, ASTH, IMM, AH, CVD, OBE, CKD"

Stats

library(gtsummary)
covidmx %>% tbl_summary(by=status) %>% add_p() %>% add_overall()

## There was an error in 'add_p()/add_difference()' for variable 'time', p-value omitted:
## Error in wilcox.test.formula(as.numeric(time) ~ as.factor(status), data = structure(list(: grouping factor must have exactly 2 levels

## There was an error in 'add_p()/add_difference()' for variable 'COPD', p-value omitted:
## Error in stats::fisher.test(c("Negative", "Negative", "Negative", "Negative", : FEXACT error 40.
## Out of workspace.

## There was an error in 'add_p()/add_difference()' for variable 'IMM', p-value omitted:
## Error in stats::fisher.test(c("Negative", "Negative", "Negative", "Negative", : FEXACT error 40.
## Out of workspace.

## There was an error in 'add_p()/add_difference()' for variable 'CVD', p-value omitted:
## Error in stats::fisher.test(c("Negative", "Negative", "Negative", "Negative", : FEXACT error 40.
## Out of workspace.

## There was an error in 'add_p()/add_difference()' for variable 'CKD', p-value omitted:
## Error in stats::fisher.test(c("Negative", "Negative", "Negative", "Negative", : FEXACT error 40.
## Out of workspace.

## Warning: The `fmt_missing()` function is deprecated and will soon be removed
## * Use the `sub_missing()` function instead

Characteristic	Overall, N = 1,048,575¹	0, N = 1,045,003¹	1, N = 3,572¹	p-value²
sex				<0.001
	897,916 (86%)	897,916 (86%)	0 (0%)
female	73,550 (7.0%)	72,428 (6.9%)	1,122 (31%)
male	77,109 (7.4%)	74,659 (7.1%)	2,450 (69%)
Age	32 (27, 36)	32 (27, 36)	35 (31, 38)	<0.001
Unknown	897,916	897,916	0
time	6 (2, 12)	NA (NA, NA)	6 (2, 12)
Unknown	1,045,003	1,045,003	0
NEU				<0.001
	897,916 (86%)	897,916 (86%)	0 (0%)
Negative	137,313 (13%)	136,369 (13%)	944 (26%)
Positive	13,346 (1.3%)	10,718 (1.0%)	2,628 (74%)
DM				<0.001
	897,916 (86%)	897,916 (86%)	0 (0%)
Negative	144,952 (14%)	142,066 (14%)	2,886 (81%)
Positive	5,707 (0.5%)	5,021 (0.5%)	686 (19%)
COPD
	897,916 (86%)	897,916 (86%)	0 (0%)
Negative	150,277 (14%)	146,742 (14%)	3,535 (99%)
Positive	382 (<0.1%)	345 (<0.1%)	37 (1.0%)
ASTH				<0.001
	897,916 (86%)	897,916 (86%)	0 (0%)
Negative	146,223 (14%)	142,765 (14%)	3,458 (97%)
Positive	4,436 (0.4%)	4,322 (0.4%)	114 (3.2%)
IMM
	897,916 (86%)	897,916 (86%)	0 (0%)
Negative	149,598 (14%)	146,183 (14%)	3,415 (96%)
Positive	1,061 (0.1%)	904 (<0.1%)	157 (4.4%)
AH				<0.001
	897,916 (86%)	897,916 (86%)	0 (0%)
Negative	142,922 (14%)	140,028 (13%)	2,894 (81%)
Positive	7,737 (0.7%)	7,059 (0.7%)	678 (19%)
CVD
	897,916 (86%)	897,916 (86%)	0 (0%)
Negative	149,671 (14%)	146,172 (14%)	3,499 (98%)
Positive	988 (<0.1%)	915 (<0.1%)	73 (2.0%)
OBE				<0.001
	897,916 (86%)	897,916 (86%)	0 (0%)
Negative	126,004 (12%)	123,660 (12%)	2,344 (66%)
Positive	24,655 (2.4%)	23,427 (2.2%)	1,228 (34%)
CKD
	897,916 (86%)	897,916 (86%)	0 (0%)
Negative	149,228 (14%)	145,957 (14%)	3,271 (92%)
Positive	1,431 (0.1%)	1,130 (0.1%)	301 (8.4%)
¹ n (%); Median (IQR)
² Pearson's Chi-squared test; Wilcoxon rank sum test

Survival analyisis

#overall analysisi

covid<- covidmx
ggsurvplot(
  fit = survfit(Surv(time, status) ~ 1, data = covid), 
  xlab = "Days", 
  ylab = "Overall survival probability")

overall<-survfit(Surv(time, status) ~ 1, data = covid)

ggsurvall<-ggsurvplot(overall, data = covid, xlab = "Days", conf.int = TRUE,
           risk.table = TRUE,tables.height = 0.20, fontsize = 7,  legend.title = "Overall", xlim = c(0,28),break.time.by = 2,
           font.title = c(20),
           font.subtitle = c(20),
           font.caption = c(20),
           font.x = c(20),
           font.y = c(20),
           font.tickslab = c(20))

ggsurvall$plot

ggsurvall$plot <- ggsurvall$plot +  
  theme(legend.title = element_text(size = 15),legend.text = element_text(size = 15))

ggsurvall

#Compute survival curves of patients stratified by treatment assignments (rx)

SEX <- survfit(Surv(time, status) ~ sex, data=covid)
NEU <- survfit(Surv(time, status) ~ NEU, data = covid)
DM <- survfit( Surv(time, status) ~ DM, data = covid)
COPD <- survfit(Surv(time, status) ~ COPD , data = covid)
ASTH <- survfit(Surv(time, status) ~ ASTH, data = covid)
IMM <- survfit(Surv(time, status) ~ IMM , data = covid)
AH <- survfit(Surv(time, status) ~ AH , data = covid)
CVD <- survfit(Surv(time, status) ~ CVD , data = covid)
OBE <- survfit(Surv(time, status) ~ OBE , data = covid)
CKD <- survfit(Surv(time, status) ~ CKD , data = covid)




SEX

## Call: survfit(formula = Surv(time, status) ~ sex, data = covid)
## 
##    1045003 observations deleted due to missingness 
##               n events median 0.95LCL 0.95UCL
## sex=female 1122   1122      5       5       6
## sex=male   2450   2450      6       6       6

summary(NEU)$table

##              records n.max n.start events    rmean se(rmean) median 0.95LCL
## NEU=Negative     944   944     944    944 9.523305 0.3783774      6       6
## NEU=Positive    2628  2628    2628   2628 8.261416 0.1835962      6       5
##              0.95UCL
## NEU=Negative       7
## NEU=Positive       6

DM

## Call: survfit(formula = Surv(time, status) ~ DM, data = covid)
## 
##    1045003 observations deleted due to missingness 
##                n events median 0.95LCL 0.95UCL
## DM=Negative 2886   2886      6       6       7
## DM=Positive  686    686      4       4       5

COPD

## Call: survfit(formula = Surv(time, status) ~ COPD, data = covid)
## 
##    1045003 observations deleted due to missingness 
##                  n events median 0.95LCL 0.95UCL
## COPD=Negative 3535   3535      6       6       6
## COPD=Positive   37     37      5       3       8

ASTH

## Call: survfit(formula = Surv(time, status) ~ ASTH, data = covid)
## 
##    1045003 observations deleted due to missingness 
##                  n events median 0.95LCL 0.95UCL
## ASTH=Negative 3458   3458      6       6       6
## ASTH=Positive  114    114      6       5       7

IMM

## Call: survfit(formula = Surv(time, status) ~ IMM, data = covid)
## 
##    1045003 observations deleted due to missingness 
##                 n events median 0.95LCL 0.95UCL
## IMM=Negative 3415   3415      6       6       6
## IMM=Positive  157    157      5       4       7

AH

## Call: survfit(formula = Surv(time, status) ~ AH, data = covid)
## 
##    1045003 observations deleted due to missingness 
##                n events median 0.95LCL 0.95UCL
## AH=Negative 2894   2894      6       6       6
## AH=Positive  678    678      5       5       6

CVD

## Call: survfit(formula = Surv(time, status) ~ CVD, data = covid)
## 
##    1045003 observations deleted due to missingness 
##                 n events median 0.95LCL 0.95UCL
## CVD=Negative 3499   3499      6       6       6
## CVD=Positive   73     73      5       4       7

OBE

## Call: survfit(formula = Surv(time, status) ~ OBE, data = covid)
## 
##    1045003 observations deleted due to missingness 
##                 n events median 0.95LCL 0.95UCL
## OBE=Negative 2344   2344      6       6       7
## OBE=Positive 1228   1228      5       5       6

CKD

## Call: survfit(formula = Surv(time, status) ~ CKD, data = covid)
## 
##    1045003 observations deleted due to missingness 
##                 n events median 0.95LCL 0.95UCL
## CKD=Negative 3271   3271      6       6       6
## CKD=Positive  301    301      5       4       5

KM plots

library(ggthemes)

ggsurvneu <-ggsurvplot(NEU, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,
           risk.table = TRUE, tables.height = 0.20,fontsize = 7, break.time.by = 2, legend.title = "NEU", legend.labs = c("Negative", "Positive"),xlim = c(0,28),
           font.title = c(20),
           font.subtitle = c(20),
           font.caption = c(20),
           font.x = c(20),
           font.y = c(20),
           font.tickslab = c(20))

ggsurvneu$plot

ggsurvneu$plot <- ggsurvneu$plot + 
  theme(legend.title = element_text(size = 15),legend.text = element_text(size = 15))

ggsurvneu

ggsurvdm <-ggsurvplot(DM, data = covid, pval = TRUE, pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,
           risk.table = TRUE,tables.height = 0.20, fontsize = 7, break.time.by = 2, legend.title = "DM", legend.labs = c("Negative", "Positive"),xlim = c(0,28),
           font.title = c(20),
           font.subtitle = c(20),
           font.caption = c(20),
           font.x = c(20),
           font.y = c(20),
           font.tickslab = c(20))

ggsurvdm$plot

ggsurvdm$plot <- ggsurvdm$plot + 
  theme(legend.title = element_text(size = 15),legend.text = element_text(size = 15))

ggsurvdm

ggsurvplot(COPD, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,xlim = c(0,28),
           risk.table = TRUE,tables.height = 0.20, break.time.by = 2, )

ggsurvplot(ASTH, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,xlim = c(0,28),
           risk.table = TRUE,tables.height = 0.20, break.time.by = 2)

ggsurvplot(IMM, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,xlim = c(0,28),
           risk.table = TRUE, tables.height = 0.20, break.time.by = 2)

ggsurvplot(CVD, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,xlim = c(0,28),
           risk.table = TRUE, tables.height = 0.20, break.time.by = 2)

ggsurvplot(AH, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,xlim = c(0,28),
           risk.table = TRUE, tables.height = 0.20, break.time.by = 2)

ggsurvobe<-ggsurvplot(OBE, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,
           risk.table = TRUE,tables.height = 0.20,fontsize = 7, break.time.by = 20,legend.title = "OBE", legend.labs = c("Negative", "Positive"),
           font.title = c(20),
           font.subtitle = c(20),
           font.caption = c(20),
           font.x = c(20),
           font.y = c(20),
           font.tickslab = c(20))

ggsurvobe$plot

ggsurvobe$plot <- ggsurvobe$plot + 
  theme(legend.title = element_text(size = 15),legend.text = element_text(size = 15))

ggsurvobe

ggsurvplot(CKD, data = covid, pval = TRUE,pval.coord = c(140, 0.1),xlab = "Days", conf.int = TRUE,
           risk.table = TRUE,tables.height = 0.20, break.time.by = 20)

KM grid plots

require("survminer")

splots <- list()

#ajustes a la gr?fica de sobrevida y tabla:
#https://cran.r-project.org/web/packages/survminer/vignettes/Playing_with_fonts_and_texts.html

# Arrange multiple ggsurvplots and print the output
#http://rpkgs.datanovia.com/survminer/reference/arrange_ggsurvplots.html

splots[[1]]<- ggsurvplot(NEU, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)
splots[[2]] <- ggsurvplot(DM, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)
splots[[3]] <- ggsurvplot(COPD, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)
splots[[4]] <-ggsurvplot(ASTH, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)



# Arrange multiple ggsurvplots and print the output
fig1<-arrange_ggsurvplots(splots, print = T,
                    ncol = 2, nrow =2 , risk.table.height = 0.4)

fig1

splots[[1]] <-ggsurvplot(IMM, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)
splots[[2]] <-ggsurvplot(CVD, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)
splots[[3]] <-ggsurvplot(OBE, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)
splots[[4]] <-ggsurvplot(CKD, data = covid, pval = TRUE,pval.coord = c(0, 0.1),xlab = "Days",xlim = c(0,28),break.time.by =2)

fig2<-arrange_ggsurvplots(splots, print = T,
                    ncol = 2, nrow =2 , risk.table.height = 0.4)


fig2

COX-REGRESSION & FOREST PLOT ANALYSIS

model.citok

## Call:
## coxph(formula = Surv(time, status) ~ +NEU + DM + COPD + ASTH + 
##     IMM + AH + CVD + OBE + CKD, data = covieda)
## 
##                   coef exp(coef)  se(coef)      z        p
## NEUPositive   0.115586  1.122531  0.038221  3.024 0.002494
## DMPositive    0.161706  1.175515  0.043884  3.685 0.000229
## COPDPositive  0.158116  1.171302  0.167251  0.945 0.344465
## ASTHPositive -0.058802  0.942893  0.095500 -0.616 0.538073
## IMMPositive   0.002329  1.002331  0.083786  0.028 0.977827
## AHPositive    0.042957  1.043893  0.046887  0.916 0.359569
## CVDPositive   0.062836  1.064852  0.120818  0.520 0.603004
## OBEPositive   0.118700  1.126032  0.036055  3.292 0.000994
## CKDPositive   0.038088  1.038823  0.066635  0.572 0.567597
## 
## Likelihood ratio test=43.43  on 9 df, p=1.797e-06
## n= 3572, number of events= 3572

require("survival")
require(ggplot2)
require(ggpubr)

ggforest(model.citok, data=covieda)

COVID-19MX Survival Analysis Model

Osuna-Ramos JF

2022-11-13

Exploratory data (EDA)

Stats

Survival analyisis

KM plots

KM grid plots

COX-REGRESSION & FOREST PLOT ANALYSIS