Evaluación de Triaje

Author

Fernando

Carga de datos

Las variables que tenemos son:

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(haven)
library(ggstatsplot)

You can cite this package as:
     Patil, I. (2021). Visualizations with statistical details: The 'ggstatsplot' approach.
     Journal of Open Source Software, 6(61), 3167, doi:10.21105/joss.03167

library(lme4)

Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack

dfRaw <- read_sav("TRIAGE_REDUCIDO.sav") %>% 
  # convertir la columna grupo en factor, con los niveles 0 y 1 convertidos en "Sin checklist", "Con checklist"
  mutate(Grupo = factor(as.factor(GRUPO), levels = c(0, 1), labels = c("Sin checklist", "Con checklist")))

Hay variables que se usarán posteriormente y las aparto ahora:

dfTrans = dfRaw %>% select(
                          -starts_with("ARTURO"),
                          -starts_with("MARIO"),
                          -starts_with("LAURA"),
                          -starts_with("JAVIER"),
                          -starts_with("MARIA"),
                          -starts_with("NICOLAS"),
                          -starts_with("MATEO"),
                          -starts_with("TRI_"))

Hay varios actores que han sido evaluados por los sujetos de GRUPO=1. Cambiamos la estructura de los datos para adecuarla a formato tidy y poder hacer análisis de los datos posteriormente:

Los aciertos y errores de los sujetos se han recogido en variables que empiezan por TRI_ y que tienen un valor de 1 si el sujeto ha asignado el triaje correspondiente al actor y un 0 si no lo ha hecho. Vamos a cambiar la estructura de los datos para poder hacer análisis de los datos posteriormente:

dfAciertoTriaje <- dfRaw %>% select(SUJETO, GRUPO,Grupo,
                             starts_with("TRI_")) %>%
  mutate_all(as.integer) %>% 
  pivot_longer(-c(SUJETO,GRUPO,Grupo), names_to="ACTOR", values_to = "TRIAJEOK") %>% mutate(ACTOR=str_remove(ACTOR, "TRI_"))

Por otro lado, independientmenete del acierto, los sujetos decidieron dar una clasificación:

colorActor <- c( "JAVIER"="verde", LAURA="verde", "ARTURO"="amarillo", MARIO="amarillo" ,NICOLAS="amarillo", MARIA="rojo", MATEO="rojo")


dfTriaje <- dfRaw %>% select(SUJETO, GRUPO, Grupo,
                             starts_with("PRI_")) %>%
  #Convertir todas las columnas cuyo nombre comienza por "PRI_" en tipo entero
   mutate(across(starts_with("PRI_"), as.integer)) %>% 
pivot_longer(-c(SUJETO,GRUPO, Grupo), names_to="ACTOR", values_to = "TRIAJE") %>%
  mutate(ACTOR=str_remove(ACTOR, "PRI_")) %>% 
  mutate(Triage=factor(as.factor(TRIAJE), levels = c(1,2,3,4), labels = c("Verde", "Amarillo","Rojo","No etiquetado"))) %>% 
  mutate(Actor=factor(ACTOR, levels = names(colorActor), labels = sprintf("%s\n(%s)",names(colorActor), colorActor)))

dfTriaje %>%  
ggstatsplot::ggbarstats(y=Grupo, x=Triage,bf.message = FALSE)+
  scale_fill_manual(values = rev(c("darkgreen","yellow","darkred","grey")))

Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.

No esperaba que hubiese valores fuera de rango: 4. Casi todos además han ocurrido en el GRUPO==1. Aparentemente el valor “4” se ha asignado cunado un sujeto no sabía qué decir y parecen haber ocurrido casi todos en el GRUPO 1. Parece que eso se merece su propia variable:

dfTriaje=dfTriaje %>% mutate(NOTRIAJE=as.integer((TRIAJE==4)), TRIAJE=ifelse(TRIAJE==4,NA,TRIAJE)) %>% 
  mutate(Duda=factor(NOTRIAJE, levels = c(0,1), labels = c("No", "Sí")))

dfTriaje %>% 
ggstatsplot::ggbarstats(y=Grupo, x=Duda,bf.message = FALSE)

Vamos a estudiar la situación a nivel de sujetos: Los que han tenido dudas en algún caso frente a los que no lo han tenido en ninguno:

dfTriaje %>% group_by(Grupo,SUJETO) %>% summarise(`Alguna vez duda`=factor(as.integer(sum(NOTRIAJE)>=1),levels=c(0,1), labels=c("Nunca", "Alguna vez"))) %>% 
  ggstatsplot::ggbarstats(y=Grupo, x=`Alguna vez duda`,bf.message = FALSE)

`summarise()` has grouped output by 'Grupo'. You can override using the
`.groups` argument.

ITEMS

dfITEM= tribble(~ITEM, ~Item,
1, "Item 1",
2 , "Ideas suicidio",
3, "Ideas agresivas",
4, "Item 4",
5, "Item 5",
6, "Fallecimiento reciente",
7, "Item 7",
8, "Item 8",
9, "Psicofármacos",
10, "Item 10",
11,"Sentir culpa") %>% mutate(ITEM=as.integer(ITEM))

dfITEM

# A tibble: 11 × 2
    ITEM Item                  
   <int> <chr>                 
 1     1 Item 1                
 2     2 Ideas suicidio        
 3     3 Ideas agresivas       
 4     4 Item 4                
 5     5 Item 5                
 6     6 Fallecimiento reciente
 7     7 Item 7                
 8     8 Item 8                
 9     9 Psicofármacos         
10    10 Item 10               
11    11 Sentir culpa

ANÁLISIS DEL ACTOR/TIPO DE CASO USANDO EL GRUPO 1

Me gustaría saber si ha tenido algo que ver el tipo de caso a estudiar (ACTOR) via sus ITEMS. Para eso necesitamos datos extra:

dfActor <- dfRaw %>% filter(GRUPO==1) %>%
                     select(SUJETO, 
                            starts_with("ARTURO"),
                            starts_with("MARIO"),
                            starts_with("LAURA"),
                            starts_with("JAVIER"),
                            starts_with("MARIA"),
                            starts_with("NICOLAS"),
                            starts_with("MATEO"))



colorActor <- c( "JAVIER"="verde", LAURA="verde", "ARTURO"="amarillo", MARIO="amarillo" ,NICOLAS="amarillo", MARIA="rojo", MATEO="rojo")




dfLong=dfActor %>% pivot_longer(-c(SUJETO),values_to="PRESENTE", names_to="ITEM") %>% separate(ITEM, into = c("ACTOR", "ITEM")) %>% 
mutate(PRESENTE=as.integer(PRESENTE==1),
       Presente=factor(as.factor(PRESENTE), levels = c(0,1), labels = c("No", "Sí")))


dfLong2 <- dfLong %>% inner_join(dfTriaje, by=c("SUJETO", "ACTOR")) %>%
  filter(!str_detect(ITEM,"C")) %>% mutate(ITEM=as.integer(ITEM)) %>% 
  left_join(dfITEM, by="ITEM") %>% 
  mutate(Actor=factor(ACTOR, levels = names(colorActor), labels = sprintf("%s\n(%s)",names(colorActor), colorActor)))

###ZZZZZZZZZZZZZZZZZZZZZZZ
dfActorItem=openxlsx::read.xlsx("tablaActores-ITEM.xlsx",1) %>% mutate(ITEM=as.integer(ITEM),PRESENTEREAL=as.integer(PRESENTE)) %>% select(ACTOR,ITEM,PRESENTEREAL)
  

itemDiscriminante <- c(2,3,6,9,11)

dfActorItemDiscriminante <- dfActorItem %>% filter(ITEM %in% itemDiscriminante) %>% filter(PRESENTEREAL==1) %>% select(ACTOR,ITEM)

dfLong3=dfLong2 %>%  inner_join(dfActorItem,by = join_by(ACTOR, ITEM)) %>% mutate(itemOK=as.integer(PRESENTE==PRESENTEREAL), itemKO=as.integer(PRESENTE!=PRESENTEREAL))

dfLong4 = dfLong3 %>% 
  mutate(ITEM_PRESENTEREAL=sprintf("%02d-%d",ITEM,PRESENTEREAL),
         Presentereal=factor(PRESENTEREAL, levels = c(0,1), labels = c("No", "Sí")),
         Item_Presentereal=sprintf("%s: %s",Item, Presentereal),
                   ACIERTO=as.integer(!is.na(PRESENTE) & PRESENTE==PRESENTEREAL)) %>%
  mutate(Acierto=factor(as.factor(ACIERTO), levels = c(0,1), labels = c("No", "Sí")))

dfLong4 %>% inner_join(dfActorItemDiscriminante %>% select(ITEM), by = join_by( ITEM)) %>%  
  select(SUJETO,ACTOR,Item_Presentereal,Acierto) %>% arrange(ACTOR,Item_Presentereal) %>% 
  ggbarstats(y=Item_Presentereal, x=Acierto, bf.message = FALSE)+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))+xlab("")+
  #Aumentar la fuente en xticks
  theme(axis.text.x = element_text(size = 12))

No es relevante en ese gráfico el cálculo superior de la significancia estadística.

Creo que es mejor extraer lo relevante en dos gráficos:

p1 <- dfLong4 %>% filter(ITEM==9) %>%  
  select(SUJETO,ACTOR,Item_Presentereal,Acierto,Presentereal) %>% arrange(ACTOR,Item_Presentereal) %>% 
  ggbarstats(y=Presentereal, x=Acierto, bf.message = FALSE)+xlab("Psicofármacos")+
   theme(axis.text.x = element_text(size = 12))
p1

Para la característica 6:

p2 <- dfLong4 %>% filter(ITEM==6) %>%  
  select(SUJETO,ACTOR,Item_Presentereal,Acierto,Presentereal) %>% arrange(ACTOR,Item_Presentereal) %>% 
  ggbarstats(y=Presentereal, x=Acierto, bf.message = FALSE)+xlab("Fallecimiento reciente")+
 theme(axis.text.x = element_text(size = 12))
p2

#unir gráficos p1 y p2 en uno solo
gridExtra::grid.arrange(p1,p2, nrow=1)

Para el resto no influye en la tasa de acierto el que esté presente o que no.

Análisis de triajes

Análisis actor a actor

En primer lugar comenzamos separando las respuestas por actores, ya que cada uno reflejaba aspectos diferentes y deberían objetivamente tener un valor de triaje diferente.

Realmente estos actores tienen una puntuación objetiva que los sujetos deberían haber asignado:

dfTriObj=tibble( ACTOR=c("ARTURO", "MARIO", "LAURA", "JAVIER", "MARIA", "NICOLAS", "MATEO"),
                 TRIAJEOBJ=c(2, 2, 1, 1, 3, 1, 3))
dfTriObj

# A tibble: 7 × 2
  ACTOR   TRIAJEOBJ
  <chr>       <dbl>
1 ARTURO          2
2 MARIO           2
3 LAURA           1
4 JAVIER          1
5 MARIA           3
6 NICOLAS         1
7 MATEO           3

Vamos a evaluar los triajes que han hecho los individuos con respecto a los triajes objetivos y vamos a anotar las diferencias (si se han pasado, quedado cortos o acertado):

dfCompTriaje=dfTriaje %>% inner_join(dfTriObj, by="ACTOR") %>% 
  mutate(DIFERENCIA=TRIAJE-TRIAJEOBJ) %>% 
  mutate(`Resultado del triage`=case_when(
    DIFERENCIA==2 ~ "Sobreestimado 2 categorías",
    DIFERENCIA==1 ~ "Sobreestimado 1 categoría",
    DIFERENCIA==-1 ~ "Infraestimado 1 categoría",
    DIFERENCIA==-2 ~ "Infraestimado 2 categorías",
    TRUE ~ "Etiquetado con categoría correcta")) %>%
  #reordenar niveles
  mutate(`Resultado del triage`=forcats::fct_relevel(`Resultado del triage`,  "Infraestimado 1 categoría", "Infraestimado 2 categorías", "Etiquetado con categoría correcta", "Sobreestimado 1 categoría", "Sobreestimado 2 categorías")) %>% 
  mutate(`Resultado del triage`=ordered(`Resultado del triage`))

Me gustaría ver comparar los aciertos según GRUPO según el ACTOR, que sabemos que está muy relacionado algunos de ellos con ciertas características que estaban presentes en los vídeos:

p3<-dfCompTriaje  %>% 
  ggbarstats(y=Actor, x=`Resultado del triage`, bf.message = FALSE)+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))+xlab("")+
  # usar paleta ordinal para el color
  scale_fill_manual(values = c("darkblue", "blue", "gray", "orange", "darkorange"))

Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.

p3

Los aciertos corresponden al cero (VIOLETA), que desde luego no es tan abundante como esperaba. En ocasiones aciertan incluso peor que eligiendo al azar (vease el caso de ARTURO)

Los valores positivos indican que el Sujeto se ha pasado en el valor del triaje con respecto al valor objetivo, y los negativos representan lo contrario. Solo en el caso de MARIO la gente se equivoca de forma simétrica. En los demás (salvo en el de NICOLAS) todo el mundo que se equivoca, o bien se pasa o bien se queda corto. En el caso de NICOLAS, la gente tiende a quedarse corta aunque un 6% se pasa. Estaría bien pensar sobre esto.

Vamos a desglosar esos patrones según GRUPO:

p3<- dfCompTriaje %>% filter(GRUPO==0)  %>% 
   ggbarstats(y=Actor, x=`Resultado del triage`, bf.message = FALSE, title="Casos sin Check-list")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))+xlab("")+
  # usar paleta ordinal para el color
  scale_fill_manual(values = c("darkblue", "blue", "gray", "orange", "darkorange"))

Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.

p3

p4<- dfCompTriaje %>% filter(GRUPO==1)  %>% 
   ggbarstats(y=Actor, x=`Resultado del triage`, bf.message = FALSE, title="Casos con Check-list")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))+xlab("")+
  # usar paleta ordinal para el color
  scale_fill_manual(values = c("darkblue", "blue", "gray", "orange", "darkorange"))

Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.

p4

Los dos juntos:

#unir gráficos p3 y p4 en uno solo
gridExtra::grid.arrange(p3,p4, nrow=2)

Comparaciones de resultados totales en triajes

Ahora solo nos interesa saber qué resultado total hemos tenido en los triajes. Se puede evaluar de dos formas en principio: Mirando aciertos/errores o bien viendo el error medio (en valor absoluto) cometido donde error 0 es acierto.

dfTriajeTotal <- dfCompTriaje%>% 
  filter(complete.cases(.)) %>%
  group_by(GRUPO,Grupo,SUJETO) %>% 
  summarise(PCTACIERTOS=100*sum(DIFERENCIA==0)/sum(!is.na(DIFERENCIA)),
            ERRORMEDIO=mean(abs(DIFERENCIA)),.groups = "drop")

Comparemos ambos grupos en ambas medidas:

dfTriajeTotal %>% 
  ggbetweenstats(x=Grupo, y=PCTACIERTOS, bf.message = FALSE)+
  ylab("Porcentaje de aciertos")+xlab("")

dfTriajeTotal %>% 
  ggbetweenstats(x=Grupo, y=ERRORMEDIO, bf.message = FALSE)+
    ylab("Error medio cometido")+xlab("")

Modelo multinivel

EN este usamos otra base de datos. Consideramos a los actores como miembros de un conjunto infinito de actores random que van a ser evaluados por los sujetos. Con ellos podemos acertar o no en el triaje.

La base de datos sería la siguiente:

dfMultinivel=dfCompTriaje %>% 
  select(GRUPO,SUJETO,ACTOR,DIFERENCIA) %>% 
  mutate(ACIERTO=as.integer(DIFERENCIA==0))  %>% 
  inner_join(dfTrans %>% 
               select(GRUPO,SUJETO,PROFESION,SEXO,EDAD,TMMS_ATENCION,TMMS_CLARIDAD,TMMS_REGULACION,RESILIENCIA,PSICOLOGIA, TOTALCOMP))

Joining with `by = join_by(GRUPO, SUJETO)`

dfMultinivel

# A tibble: 819 × 14
   GRUPO   SUJETO ACTOR DIFERENCIA ACIERTO PROFESION SEXO     EDAD TMMS_ATENCION
   <dbl+l>  <dbl> <chr>      <dbl>   <int> <dbl+lbl> <dbl+l> <dbl>         <dbl>
 1 1 [CON…      1 ARTU…         -1       0 2 [MÉDIC… 1 [VAR…    60             8
 2 1 [CON…      1 MARIO         -1       0 2 [MÉDIC… 1 [VAR…    60             8
 3 1 [CON…      1 LAURA          0       1 2 [MÉDIC… 1 [VAR…    60             8
 4 1 [CON…      1 JAVI…          0       1 2 [MÉDIC… 1 [VAR…    60             8
 5 1 [CON…      1 MARIA         -1       0 2 [MÉDIC… 1 [VAR…    60             8
 6 1 [CON…      1 NICO…          0       1 2 [MÉDIC… 1 [VAR…    60             8
 7 1 [CON…      1 MATEO         -2       0 2 [MÉDIC… 1 [VAR…    60             8
 8 1 [CON…      2 ARTU…         -1       0 2 [MÉDIC… 2 [MUJ…    49            16
 9 1 [CON…      2 MARIO         -1       0 2 [MÉDIC… 2 [MUJ…    49            16
10 1 [CON…      2 LAURA          0       1 2 [MÉDIC… 2 [MUJ…    49            16
# ℹ 809 more rows
# ℹ 5 more variables: TMMS_CLARIDAD <dbl>, TMMS_REGULACION <dbl>,
#   RESILIENCIA <dbl>, PSICOLOGIA <dbl+lbl>, TOTALCOMP <dbl>

Ahora hacemos el modelo de regresión logístico multinivel usando el ACTOR como un factor RANDOM:

modeloMultinivel=glmer(ACIERTO ~ GRUPO+SEXO+EDAD+TMMS_ATENCION+TMMS_CLARIDAD+TMMS_REGULACION+RESILIENCIA+PSICOLOGIA+#TOTALCOMP+
                         (1|ACTOR), data = dfMultinivel, family = binomial(link = "logit"))

summary(modeloMultinivel)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: ACIERTO ~ GRUPO + SEXO + EDAD + TMMS_ATENCION + TMMS_CLARIDAD +  
    TMMS_REGULACION + RESILIENCIA + PSICOLOGIA + (1 | ACTOR)
   Data: dfMultinivel

     AIC      BIC   logLik deviance df.resid 
  1046.0   1092.3   -513.0   1026.0      751 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.9046 -0.9356 -0.6215  0.9678  1.5628 

Random effects:
 Groups Name        Variance Std.Dev.
 ACTOR  (Intercept) 0.0972   0.3118  
Number of obs: 761, groups:  ACTOR, 7

Fixed effects:
                 Estimate Std. Error z value Pr(>|z|)   
(Intercept)     -0.119327   0.684137  -0.174  0.86154   
GRUPO            0.462878   0.156869   2.951  0.00317 **
SEXO            -0.092516   0.170228  -0.543  0.58680   
EDAD             0.003273   0.006861   0.477  0.63334   
TMMS_ATENCION    0.031012   0.023070   1.344  0.17886   
TMMS_CLARIDAD    0.021772   0.026276   0.829  0.40733   
TMMS_REGULACION  0.030599   0.025183   1.215  0.22433   
RESILIENCIA     -0.045661   0.016807  -2.717  0.00659 **
PSICOLOGIA       0.170794   0.160447   1.064  0.28711   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) GRUPO  SEXO   EDAD   TMMS_A TMMS_C TMMS_R RESILI
GRUPO       -0.021                                                 
SEXO        -0.469 -0.097                                          
EDAD        -0.639 -0.237  0.260                                   
TMMS_ATENCI -0.228  0.085 -0.313  0.245                            
TMMS_CLARID -0.107 -0.090  0.030 -0.048 -0.299                     
TMMS_REGULA -0.172  0.005  0.117  0.176 -0.008 -0.174              
RESILIENCIA -0.330  0.055  0.070 -0.075 -0.103 -0.214 -0.484       
PSICOLOGIA   0.068  0.091 -0.010 -0.106  0.092 -0.139  0.045 -0.142

El estilo de publicaciones es algo así:

gtsummary::tbl_regression(modeloMultinivel, digits=3, tidy_fun = broom.mixed::tidy)

Characteristic	log(OR)¹	95% CI¹	p-value
GRUPO	0.46	0.16, 0.77	0.003
GÉNERO	-0.09	-0.43, 0.24	0.6
EDAD	0.00	-0.01, 0.02	0.6
FACTOR ATENCIÓN INT. EMOCIONAL	0.03	-0.01, 0.08	0.2
FACTOR CLARIDAD INT. EMOCIONAL	0.02	-0.03, 0.07	0.4
FACTOR REGULACIÓN INT. EMOCIONAL	0.03	-0.02, 0.08	0.2
RESILIENCIA	-0.05	-0.08, -0.01	0.007
GRADUADO PSICOLOGÍA	0.17	-0.14, 0.49	0.3
ACTOR.sd__(Intercept)	0.31
¹ OR = Odds Ratio, CI = Confidence Interval

O así:

gtsummary::tbl_regression(modeloMultinivel, digits=3, tidy_fun = broom.mixed::tidy,exponentiate = TRUE)

Characteristic	OR¹	95% CI¹	p-value
GRUPO	1.59	1.17, 2.16	0.003
GÉNERO	0.91	0.65, 1.27	0.6
EDAD	1.00	0.99, 1.02	0.6
FACTOR ATENCIÓN INT. EMOCIONAL	1.03	0.99, 1.08	0.2
FACTOR CLARIDAD INT. EMOCIONAL	1.02	0.97, 1.08	0.4
FACTOR REGULACIÓN INT. EMOCIONAL	1.03	0.98, 1.08	0.2
RESILIENCIA	0.96	0.92, 0.99	0.007
GRADUADO PSICOLOGÍA	1.19	0.87, 1.62	0.3
ACTOR.sd__(Intercept)	0.31
¹ OR = Odds Ratio, CI = Confidence Interval

O así:

sjPlot::tab_model(modeloMultinivel)

	ACIERTO
Predictors	Odds Ratios	CI	p
(Intercept)	0.89	0.23 – 3.39	0.862
GRUPO	1.59	1.17 – 2.16	0.003
GÉNERO	0.91	0.65 – 1.27	0.587
EDAD	1.00	0.99 – 1.02	0.633
FACTOR ATENCIÓN INT. EMOCIONAL	1.03	0.99 – 1.08	0.179
FACTOR CLARIDAD INT. EMOCIONAL	1.02	0.97 – 1.08	0.407
FACTOR REGULACIÓN INT. EMOCIONAL	1.03	0.98 – 1.08	0.224
RESILIENCIA	0.96	0.92 – 0.99	0.007
GRADUADO PSICOLOGÍA	1.19	0.87 – 1.62	0.287
Random Effects
σ²	3.29
τ₀₀ _ACTOR	0.10
ICC	0.03
N _ACTOR	7
Observations	761
Marginal R² / Conditional R²	0.032 / 0.060