covidtableandboxplot

EDA with tables

Cargamos la base de datos en R

library(readxl)

## Warning: package 'readxl' was built under R version 4.1.2

DBmodR <- read_excel("C:/Users/fidel/OneDrive - CINVESTAV/NLPR, LPR, NPR ratio in COVID-19 and DENGUE PAPER/PAPER/3erround/DBmodR.xlsx")

DB<-DBmodR

str(DB)

## tibble [288 x 10] (S3: tbl_df/tbl/data.frame)
##  $ SEXO       : chr [1:288] "F" "M" "F" "F" ...
##  $ EDAD       : num [1:288] 29 33 46 43 90 24 56 29 33 59 ...
##  $ DX         : chr [1:288] "FD" "FD" "FD" "FD" ...
##  $ SEVERITY   : chr [1:288] "Non-severe" "Non-severe" "Non-severe" "Non-severe" ...
##  $ PLAQUETAS  : num [1:288] 44 80 110 205 229 233 189 293 152 280 ...
##  $ LINFOCITOS : num [1:288] 2.26 0.83 0.63 1.33 1.04 0.36 3.55 1.22 1.54 1.03 ...
##  $ NEUTROFILOS: num [1:288] 3.17 2.08 2.11 7.6 3.97 ...
##  $ NPLR       : num [1:288] 3.19 3.13 3.04 2.79 1.67 ...
##  $ NLR        : num [1:288] 1.4 2.51 3.35 5.71 3.82 ...
##  $ LPR        : num [1:288] 19.5 96.4 174.6 154.1 220.2 ...

Hacemos limpienza de la base de datos con Tydiverse

Recodificamos la variable DX, y SEVERITY

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.2     v dplyr   1.0.8
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.2     v forcats 0.5.1

## Warning: package 'readr' was built under R version 4.1.1

## Warning: package 'dplyr' was built under R version 4.1.3

## Warning: package 'stringr' was built under R version 4.1.3

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

DB <- DB %>% mutate(DX=recode(DX,`FD` = "Dengue", `FHD` = "Dengue"))

DB <- DB %>% mutate (SEVERITY=recode(SEVERITY, `NO CRIT` = "Non-critical"))

EDA

Haremos una tabla con el paquete gtsummary

library(gtsummary)

## #BlackLivesMatter

DB <- DB %>% mutate (SEVERITY=recode(SEVERITY, `NO CRIT` = "Non-critical"))

DBCLINICR<-colnames(DB) <- c('Sex', 'Age (years)', 'DX', 'Severity', 
                          'Platelets', 'Lymphocytes', 'Neutrophils','NPLR','NLR','LPR') 
str(DBCLINICR)

##  chr [1:10] "Sex" "Age (years)" "DX" "Severity" "Platelets" "Lymphocytes" ...

DB %>%  select(Sex, Severity, Platelets,Lymphocytes,Neutrophils,NPLR,NLR,LPR, DX) %>%
  tbl_summary(by = DX,                                               # stratify entire table by outcome
                   statistic = list(all_continuous() ~ "{median} ({IQR})",        # stats and format for continuous columns
                                    all_categorical() ~ "{n} ({p}%)"),   # stats and format for categorical columns
                   digits = all_continuous() ~ 1,                              # rounding for continuous columns
                   type   = all_categorical() ~ "categorical") %>% add_p() %>% add_overall()

Characteristic	Overall, N = 288¹	COVID-19, N = 105¹	Dengue, N = 183¹	p-value²
Sex				<0.001
F	163 (57%)	41 (39%)	122 (67%)
M	125 (43%)	64 (61%)	61 (33%)
Severity				<0.001
Non-critical	105 (36%)	105 (100%)	0 (0%)
Non-severe	183 (64%)	0 (0%)	183 (100%)
Platelets	168.5 (159.5)	286.0 (179.0)	125.0 (109.0)	<0.001
Lymphocytes	0.9 (0.9)	1.0 (0.8)	0.9 (0.9)	0.7
Neutrophils	3.2 (6.1)	9.0 (6.5)	2.0 (1.9)	<0.001
NPLR	2.5 (3.6)	3.2 (4.5)	2.1 (2.9)	<0.001
NLR	3.7 (6.7)	8.8 (11.8)	2.1 (2.7)	<0.001
LPR	183.6 (228.8)	323.0 (276.4)	155.2 (157.0)	<0.001
¹ n (%); Median (IQR)
² Pearson's Chi-squared test; Wilcoxon rank sum test

##Boxplot

dbgatc <- data.frame(DB$DX,DB$Platelets, DB$Lymphocytes, DB$Neutrophils, DB$NPLR,
                     DB$NLR, DB$LPR)


colnames(dbgatc) <- c('DX', 'Platelets', 'Lymphocytes', 'Neutrophils', 'NLPR',
                     'NLR', 'LPR')

DBgat <- gather(data= dbgatc, blodcountpar, level, -DX, na.rm = TRUE)

##creamos el bloxplot una vez limpia la base de datos

Usaremos el paquete ggplot2, para graficar, además usaremos ggpubr para agregar valores a las sgnificancia estadistica

#boxplot only 

library(ggplot2)
library(ggpubr)
library(rstatix)

## 
## Attaching package: 'rstatix'

## The following object is masked from 'package:stats':
## 
##     filter

your_font_size <- 5

DBgat %>% mutate(DX = factor(DX, levels=c("Dengue", "COVID-19"))) %>% 
  ggplot(aes(x=blodcountpar, y=level, fill=factor(DX),add = "jitter"), 
         order = c("Neutrophils","Lymphocytes","Platelets","NLR","LPR", "NPLR")) +
  geom_boxplot()+  geom_jitter(shape=16,
                               position=position_jitter(),
                               alpha = .2)+
  stat_compare_means(aes(group = DX),method = "wilcox.test", label = "p.signif", hide.ns = F, 
                     size = your_font_size)+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+ theme_classic2()+ scale_fill_grey(start = 0.9, end = .5)+
  labs(y="", fill= "", x="", fill="")+ theme(text = element_text(size = 20), axis.text = element_text(size = 20),
                                           legend.text = element_text(size = 20)) +  theme( strip.background = element_blank(),
                                                                                            strip.text.x = element_blank())+ #to eliminate strips
  facet_wrap(blodcountpar ~ ., scales="free")

covidtableandboxplot

Fidel OsunaR

23/5/2022

EDA with tables

Hacemos limpienza de la base de datos con Tydiverse

EDA