Análisis de datos Dr. Varela

mi asesor me pidió, 1. dividir en grupo pacientes con mas de tres cruces y menos de tres cruces, ver si entre estos existe diferencia significativa en el tiempo que alcanzar a negativizar … 2. relación entre el indice baciloscópico, morfologico y tintorial a través del tiempo (esto es complicado porque no todas las baciloscopias se hacen de manera heterogenea, hay pacientes que regresan a su seguimiento al año)…

Objetivo 1.

Dividir en grupo de pacientes (creamos la columna divi)

<3 cruces

>3 cruces

#Dividir en grupo de pacientes
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(gtsummary)
library(readxl)
library(ggpubr)
library(dlookr)
## 
## Attaching package: 'dlookr'
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## The following object is masked from 'package:base':
## 
##     transform
library(gtable)
library(gt)
dbtb <- read_excel("C:/Users/fidel/OneDrive - CINVESTAV/PROYECTO MDatos/TRABAJOS/Dr. Lauro Varela Hacer grafica/dbtb.xlsx")



dbtb <- dbtb %>% mutate(Grupo = case_when(
  Average <3 ~ "<3",
  Average >=3 ~ ">3"))

dbtb %>% gt()
IBINI IB2 IB3 IB4 IB5 Average mesesneg añosneg IMIINI ITIINI ENTINI FRAGINI GRANINI NORMINI n2a Grupo
2 3 0 3 2 2.000000 25.00000 3 50 50 50 30 20 50 0 <3
2 0 0 0 0 0.400000 13.00000 2 50 80 50 30 20 80 1 <3
2 3 1 0 0 1.200000 20.00000 2 30 50 30 40 20 30 1 <3
2 3 1 2 0 1.600000 37.01918 4 50 50 50 30 20 50 0 <3
3 3 0 0 0 1.200000 9.00000 1 50 80 50 30 20 80 1 <3
3 3 3 1 2 2.400000 30.01644 3 20 80 20 50 30 80 0 <3
3 2 3 1 1 2.000000 23.83562 2 20 80 20 50 30 80 0 <3
3 4 5 4 3 3.800000 NA NA 30 50 30 50 20 50 0 >3
3 4 2 2 2 2.600000 26.49863 3 80 100 80 10 10 100 0 <3
3 1 0 0 0 0.800000 10.00000 1 50 100 50 30 20 0 1 <3
3 4 5 4 0 3.200000 20.00000 2 50 50 50 30 20 50 1 >3
3 4 4 1 1 2.600000 32.94247 3 50 50 50 50 30 20 0 <3
3 0 1 0 0 0.800000 19.00000 2 50 80 50 30 20 80 1 <3
3 2 0 0 0 1.000000 16.00000 2 20 0 20 50 30 20 1 <3
3 4 1 3 0 2.200000 31.00000 3 50 100 50 30 20 60 0 <3
3 0 NA NA NA 1.500000 12.00000 2 80 100 80 15 5 60 1 <3
3 3 2 0 0 1.600000 30.00000 3 70 70 70 30 0 70 0 <3
3 3 0 2 0 1.600000 23.00000 2 80 80 80 20 0 80 1 <3
4 4 2 3 0 2.600000 30.37808 3 50 100 50 30 20 100 0 <3
4 2 2 0 0 1.600000 21.99452 2 80 100 80 10 10 60 0 <3
4 2 2 3 0 2.200000 20.74521 2 50 50 50 30 20 50 0 <3
4 4 3 3 0 2.800000 34.00000 3 60 50 60 30 10 50 0 <3
4 0 2 2 0 1.600000 25.00000 3 100 100 100 0 0 100 0 <3
4 2 2 3 2 2.600000 38.43288 4 50 70 50 70 50 50 0 <3
4 5 3 2 3 3.400000 35.27671 3 70 80 70 20 10 80 0 >3
4 4 3 4 3 3.600000 NA NA 20 80 20 50 30 80 0 >3
4 3 2 0 2 2.200000 52.47123 5 60 50 60 30 10 60 0 <3
4 3 1 2 0 2.000000 39.18904 4 80 80 80 10 10 80 0 <3
5 4 4 2 0 3.000000 25.31507 3 80 50 80 20 0 50 0 >3
5 3 4 2 2 3.200000 44.87671 4 80 50 80 20 0 50 1 >3
5 0 3 0 0 1.600000 35.80274 3 100 100 100 0 0 100 0 <3
5 6 0 6 5 4.400000 NA NA 70 80 70 20 10 40 0 >3
5 4 0 4 2 3.000000 57.10685 5 50 50 50 50 0 50 1 >3
5 2 3 2 2 2.800000 52.27397 5 90 20 90 10 0 20 0 <3
5 4 2 2 4 3.400000 48.85479 5 50 80 50 30 20 50 0 >3
5 4 4 3 2 3.600000 NA NA 70 80 70 20 10 80 0 >3
5 3 2 0 0 2.000000 20.02192 2 30 50 30 30 40 50 1 <3
5 1 3 0 0 1.800000 17.75342 2 80 80 80 10 10 80 0 <3
5 2 0 4 1 2.400000 27.91233 3 70 80 70 20 10 80 1 <3
6 4 4 3 4 4.200000 NA NA 80 90 80 20 0 90 0 >3
6 2 3 NA NA 3.666667 NA NA 90 20 90 10 0 20 0 >3
6 4 5 3 4 4.400000 NA NA 80 50 80 10 10 50 0 >3
plotgrupo<-dbtb %>% 
  group_by(Grupo) %>%
  summarise(counts = n())

ggplot(plotgrupo, aes(x = Grupo, y = counts, fill= Grupo)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = counts), vjust = -0.3, fontface=2, size= 6) + 
  theme_pubclean() + ylab("conteo") + xlab ("Grupo BI")+
  theme(text = element_text(size = 16, face="bold"), axis.text = element_text(size = 16, face="bold"),
        legend.text = element_text(size = 12),)+ scale_fill_brewer(palette="Paired")+
  theme(axis.text.x = element_text(angle = 0, hjust = 1))+theme(legend.position = "none")

Hacemos el grafico

Curvas KM, tomando en cuenta negativo 2 años

library("ggplot2")
library("ggpubr")
library("survival")
library("survminer")
## 
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
## 
##     myeloma
library("tidyverse")
library("dlookr")
library("readxl")


dbtb$n2a = as.numeric(dbtb$n2a)

str(dbtb)
## tibble [42 × 16] (S3: tbl_df/tbl/data.frame)
##  $ IBINI   : num [1:42] 2 2 2 2 3 3 3 3 3 3 ...
##  $ IB2     : num [1:42] 3 0 3 3 3 3 2 4 4 1 ...
##  $ IB3     : num [1:42] 0 0 1 1 0 3 3 5 2 0 ...
##  $ IB4     : num [1:42] 3 0 0 2 0 1 1 4 2 0 ...
##  $ IB5     : num [1:42] 2 0 0 0 0 2 1 3 2 0 ...
##  $ Average : num [1:42] 2 0.4 1.2 1.6 1.2 2.4 2 3.8 2.6 0.8 ...
##  $ mesesneg: num [1:42] 25 13 20 37 9 ...
##  $ añosneg : num [1:42] 3 2 2 4 1 3 2 NA 3 1 ...
##  $ IMIINI  : num [1:42] 50 50 30 50 50 20 20 30 80 50 ...
##  $ ITIINI  : num [1:42] 50 80 50 50 80 80 80 50 100 100 ...
##  $ ENTINI  : num [1:42] 50 50 30 50 50 20 20 30 80 50 ...
##  $ FRAGINI : num [1:42] 30 30 40 30 30 50 50 50 10 30 ...
##  $ GRANINI : num [1:42] 20 20 20 20 20 30 30 20 10 20 ...
##  $ NORMINI : num [1:42] 50 80 30 50 80 80 80 50 100 0 ...
##  $ n2a     : num [1:42] 0 1 1 0 1 0 0 0 0 1 ...
##  $ Grupo   : chr [1:42] "<3" "<3" "<3" "<3" ...
fit <- survfit(Surv(mesesneg, n2a) ~ Grupo, data = dbtb)
print(fit)
## Call: survfit(formula = Surv(mesesneg, n2a) ~ Grupo, data = dbtb)
## 
##    7 observations deleted due to missingness 
##           n events median 0.95LCL 0.95UCL
## Grupo=<3 29     10     NA    27.9      NA
## Grupo=>3  6      3   57.1    44.9      NA
# Summary of survival curves
summary(fit)
## Call: survfit(formula = Surv(mesesneg, n2a) ~ Grupo, data = dbtb)
## 
## 7 observations deleted due to missingness 
##                 Grupo=<3 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   9.0     29       1    0.966  0.0339        0.901        1.000
##  10.0     28       1    0.931  0.0471        0.843        1.000
##  12.0     27       1    0.897  0.0566        0.792        1.000
##  13.0     26       1    0.862  0.0640        0.745        0.997
##  16.0     25       1    0.828  0.0701        0.701        0.977
##  19.0     23       1    0.792  0.0758        0.656        0.955
##  20.0     22       1    0.756  0.0804        0.613        0.931
##  20.0     21       1    0.720  0.0842        0.572        0.905
##  23.0     18       1    0.680  0.0885        0.526        0.877
##  27.9     13       1    0.627  0.0959        0.465        0.847
## 
##                 Grupo=>3 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##  20.0      6       1    0.833   0.152        0.583            1
##  44.9      3       1    0.556   0.248        0.231            1
##  57.1      1       1    0.000     NaN           NA           NA
# Access to the sort summary table
summary(fit)$table
##          records n.max n.start events    rmean se(rmean)   median  0.95LCL
## Grupo=<3      29    29      29     10 42.39915  3.758969       NA 27.91233
## Grupo=>3       6     6       6      3 47.52511  5.740091 57.10685 44.87671
##          0.95UCL
## Grupo=<3      NA
## Grupo=>3      NA
#Access to the value returned by survfit()

d <- data.frame(time = fit$time,
                n.risk = fit$n.risk,
                n.event = fit$n.event,
                n.censor = fit$n.censor,
                surv = fit$surv,
                upper = fit$upper,
                lower = fit$lower
)
head(d)
##       time n.risk n.event n.censor      surv     upper     lower
## 1  9.00000     29       1        0 0.9655172 1.0000000 0.9013401
## 2 10.00000     28       1        0 0.9310345 1.0000000 0.8432302
## 3 12.00000     27       1        0 0.8965517 1.0000000 0.7922890
## 4 13.00000     26       1        0 0.8620690 0.9971665 0.7452746
## 5 16.00000     25       1        0 0.8275862 0.9771457 0.7009179
## 6 17.75342     24       0        1 0.8275862 0.9771457 0.7009179
#Visualize survival curves


# Change color, linetype by strata, risk.table color by strata
ggsurv<-ggsurvplot(fit,
                   pval = TRUE, pval.method = T, conf.int = F, xlab = "Meses", fontsize = 5,size = 1.5, ylab ="Probabilidad de negativización a 2 años", legend.labs = c("BI <3", "BI >3"),
                   break.time.by = 1, legend.title = "",
                   pval.coord = c(8, 0.1), pval.method.coord = c(8,0.15),
                   risk.table = T, # Add risk table
                   risk.table.col = "strata", # Change risk table color by groups
                   linetype = "strata", # Change line type by groups # Specify median survival
                   xlim = c(8, 16),
                   font.submain = c(12),
                   font.title = c(12,color = "black", face = "bold" ),
                   font.subtitle = c(12),
                   font.caption = c(12),
                   font.x = c(16,"bold.italic"),
                   font.y = c(16,"bold.italic"),
                   font.tickslab = c(12))


ggsurv$plot <- ggsurv$plot + 
  theme(legend.text = element_text(size = 12, color = "black", face = "bold"))
ggsurv