ARM

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.2.3

## Warning: package 'ggplot2' was built under R version 4.2.3

## Warning: package 'tibble' was built under R version 4.2.3

## Warning: package 'tidyr' was built under R version 4.2.3

## Warning: package 'readr' was built under R version 4.2.3

## Warning: package 'purrr' was built under R version 4.2.3

## Warning: package 'dplyr' was built under R version 4.2.3

## Warning: package 'forcats' was built under R version 4.2.3

## Warning: package 'lubridate' was built under R version 4.2.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors

library(ggpubr)

## Warning: package 'ggpubr' was built under R version 4.2.3

library(haven)

## Warning: package 'haven' was built under R version 4.2.3

DATOS_RESISTENCIA_BACTERIANA <- read_sav("C:/Users/fidel/OneDrive - UNIVERSIDAD AUTONOMA DE SINALOA/INVESTIGACIÓN DGPI/DATOS_RESISTENCIA_BACTERIANA.sav")
  db<-DATOS_RESISTENCIA_BACTERIANA

head(db)

## # A tibble: 6 × 35
##   FOLIO FECHA      NOMBRE       SEXO     EDAD EDAD1   CULTIVO BACTERIA BACTERIA1
##   <dbl> <date>     <chr>        <dbl+l> <dbl> <dbl+l> <dbl+l> <dbl+lb> <dbl+lbl>
## 1     1 2022-02-22 CLAUDIA MAB… 1 [FEM…    49 3 [> 3… 1 [URO… 1 [ESCH… 1 [GRAM …
## 2     2 2022-02-22 JOSE PATIÑO  2 [MAS…    34 2 [20-… 2 [EXU… 4 [KLEB… 1 [GRAM …
## 3     3 2022-02-22 ANA KAREN D… 1 [FEM…    33 2 [20-… 1 [URO… 9 [LEVA… 3 [LEVAD…
## 4     4 2022-02-22 EVANGELINA … 1 [FEM…    53 3 [> 3… 1 [URO… 1 [ESCH… 1 [GRAM …
## 5     5 2022-02-22 ALEJANDRA R… 1 [FEM…    45 2 [20-… 1 [URO… 1 [ESCH… 1 [GRAM …
## 6     6 2022-02-22 MARIA RAMIR… 1 [FEM…    33 2 [20-… 1 [URO… 6 [S. A… 2 [GRAM …
## # ℹ 26 more variables: LEUCOCITOS <dbl+lbl>, ERITROCITOS <dbl+lbl>, PH <dbl>,
## #   NITRITOS <dbl+lbl>, PROTEINAS <dbl+lbl>, GLUCOSA <dbl+lbl>,
## #   SANGRE <dbl+lbl>, AK <dbl+lbl>, AM <dbl+lbl>, CB <dbl+lbl>, CF <dbl+lbl>,
## #   CFX <dbl+lbl>, CPF <dbl+lbl>, CL <dbl+lbl>, GENT <dbl+lbl>, NET <dbl+lbl>,
## #   NF <dbl+lbl>, NOF <dbl+lbl>, SXT <dbl+lbl>, CLM <dbl+lbl>, DC <dbl+lbl>,
## #   E <dbl+lbl>, PE <dbl+lbl>, TE <dbl+lbl>, VA <dbl+lbl>,
## #   RESISTENCIA <dbl+lbl>

head(db)

## # A tibble: 6 × 35
##   FOLIO FECHA      NOMBRE       SEXO     EDAD EDAD1   CULTIVO BACTERIA BACTERIA1
##   <dbl> <date>     <chr>        <dbl+l> <dbl> <dbl+l> <dbl+l> <dbl+lb> <dbl+lbl>
## 1     1 2022-02-22 CLAUDIA MAB… 1 [FEM…    49 3 [> 3… 1 [URO… 1 [ESCH… 1 [GRAM …
## 2     2 2022-02-22 JOSE PATIÑO  2 [MAS…    34 2 [20-… 2 [EXU… 4 [KLEB… 1 [GRAM …
## 3     3 2022-02-22 ANA KAREN D… 1 [FEM…    33 2 [20-… 1 [URO… 9 [LEVA… 3 [LEVAD…
## 4     4 2022-02-22 EVANGELINA … 1 [FEM…    53 3 [> 3… 1 [URO… 1 [ESCH… 1 [GRAM …
## 5     5 2022-02-22 ALEJANDRA R… 1 [FEM…    45 2 [20-… 1 [URO… 1 [ESCH… 1 [GRAM …
## 6     6 2022-02-22 MARIA RAMIR… 1 [FEM…    33 2 [20-… 1 [URO… 6 [S. A… 2 [GRAM …
## # ℹ 26 more variables: LEUCOCITOS <dbl+lbl>, ERITROCITOS <dbl+lbl>, PH <dbl>,
## #   NITRITOS <dbl+lbl>, PROTEINAS <dbl+lbl>, GLUCOSA <dbl+lbl>,
## #   SANGRE <dbl+lbl>, AK <dbl+lbl>, AM <dbl+lbl>, CB <dbl+lbl>, CF <dbl+lbl>,
## #   CFX <dbl+lbl>, CPF <dbl+lbl>, CL <dbl+lbl>, GENT <dbl+lbl>, NET <dbl+lbl>,
## #   NF <dbl+lbl>, NOF <dbl+lbl>, SXT <dbl+lbl>, CLM <dbl+lbl>, DC <dbl+lbl>,
## #   E <dbl+lbl>, PE <dbl+lbl>, TE <dbl+lbl>, VA <dbl+lbl>,
## #   RESISTENCIA <dbl+lbl>

# Recodificar los valores de la variable1 según un diccionario
db$BACTERIA = as.numeric(db$BACTERIA)

db <- db %>%
  mutate(BACTERIA = recode(BACTERIA, `1` = "Escherichia coli", `2` = "Pseudomona aeruginosa", `3` = "Shigella", `4` ="klebsiella pneumoniae" , `5` = "klebsiella pneumoniae", `6`= "Staphylococcus aureus", `7` = "Staphylococcus epidermidis", `8` ="Enterobacter", `9`= "Yeast", `10`= "Proteus")) 


str(db$BACTERIA)

##  chr [1:197] "Escherichia coli" "klebsiella pneumoniae" "Yeast" ...

library(gtsummary)

## Warning: package 'gtsummary' was built under R version 4.2.3

db %>% select(BACTERIA) %>%  tbl_summary()

Characteristic	N = 197¹
BACTERIA
Enterobacter	1 (0.5%)
Escherichia coli	128 (65%)
klebsiella pneumoniae	6 (3.0%)
Proteus	1 (0.5%)
Pseudomona aeruginosa	6 (3.0%)
Staphylococcus aureus	22 (11%)
Staphylococcus epidermidis	28 (14%)
Yeast	5 (2.5%)
¹ n (%)

dbresis <- db %>% select(BACTERIA, AK:VA)

dbresis<-dbresis %>% mutate_at(vars(AK:VA), as.character) 

dbresisten<- dbresis %>%
  mutate_at(vars(AK:VA), ~recode(.,  `1` = "S", `3` = "R", `NA`= "")) 

datos <- dbresisten %>% gather(key= "farmaco", value= "resultado", -BACTERIA)

datos %>% select(BACTERIA, farmaco, resultado
                 ) %>% mutate (farmaco=paste("Farmaco", farmaco)) %>% 
  tbl_strata(strata = farmaco,
    .tbl_fun =
      ~ .x %>%
        tbl_summary(by = resultado, percent = 'row') %>%
        add_n(), 
    .header = "**{strata}**, N = {n}"
  )

## 79 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.

## 54 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.

## 79 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.

## 54 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 54 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.

## 79 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.

## 172 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.

## 54 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.

## 172 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 172 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.

## 54 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.

## 79 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 79 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 79 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.

## 172 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.

## 54 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.

## 172 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 172 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.

Characteristic	Farmaco AK, N = 197			Farmaco AM, N = 197			Farmaco CB, N = 197			Farmaco CF, N = 197			Farmaco CFX, N = 197			Farmaco CL, N = 197			Farmaco CLM, N = 197			Farmaco CPF, N = 197			Farmaco DC, N = 197			Farmaco E, N = 197			Farmaco GENT, N = 197			Farmaco NET, N = 197			Farmaco NF, N = 197			Farmaco NOF, N = 197			Farmaco PE, N = 197			Farmaco SXT, N = 197			Farmaco TE, N = 197			Farmaco VA, N = 197
Characteristic	N	R, N = 13¹	S, N = 105¹	N	R, N = 130¹	S, N = 13¹	N	R, N = 79¹	S, N = 39¹	N	R, N = 77¹	S, N = 66¹	N	R, N = 38¹	S, N = 105¹	N	R, N = 21¹	S, N = 97¹	N	R, N = 5¹	S, N = 20¹	N	R, N = 52¹	S, N = 91¹	N	R, N = 24¹	S, N = 1¹	N	R, N = 11¹	S, N = 14¹	N	R, N = 40¹	S, N = 103¹	N	R, N = 17¹	S, N = 101¹	N	R, N = 12¹	S, N = 106¹	N	R, N = 48¹	S, N = 70¹	N	R, N = 24¹	S, N = 1¹	N	R, N = 65¹	S, N = 78¹	N	R, N = 2¹	S, N = 23¹	N	R, N = 3¹	S, N = 22¹
BACTERIA	118			143			118			143			143			118			25			143			25			25			143			118			118			118			25			143			25			25
Escherichia coli		11 (10%)	95 (90%)		95 (89%)	12 (11%)		71 (67%)	35 (33%)		65 (61%)	42 (39%)		34 (32%)	73 (68%)		17 (16%)	89 (84%)		1 (100%)	0 (0%)		46 (43%)	61 (57%)		1 (100%)	0 (0%)		1 (100%)	0 (0%)		33 (31%)	74 (69%)		14 (13%)	92 (87%)		9 (8.5%)	97 (92%)		43 (41%)	63 (59%)		1 (100%)	0 (0%)		52 (49%)	55 (51%)		0 (0%)	1 (100%)		0 (0%)	1 (100%)
klebsiella pneumoniae		0 (0%)	5 (100%)		5 (100%)	0 (0%)		2 (40%)	3 (60%)		2 (40%)	3 (60%)		1 (20%)	4 (80%)		0 (0%)	5 (100%)					1 (20%)	4 (80%)								0 (0%)	5 (100%)		0 (0%)	5 (100%)		0 (0%)	5 (100%)		2 (40%)	3 (60%)					2 (40%)	3 (60%)
Proteus		1 (100%)	0 (0%)		1 (100%)	0 (0%)		1 (100%)	0 (0%)		1 (100%)	0 (0%)		0 (0%)	1 (100%)		0 (0%)	1 (100%)					1 (100%)	0 (0%)								1 (100%)	0 (0%)		0 (0%)	1 (100%)		0 (0%)	1 (100%)		0 (0%)	1 (100%)					1 (100%)	0 (0%)
Pseudomona aeruginosa		1 (20%)	4 (80%)		5 (100%)	0 (0%)		4 (80%)	1 (20%)		5 (100%)	0 (0%)		2 (40%)	3 (60%)		4 (80%)	1 (20%)					2 (40%)	3 (60%)								2 (40%)	3 (60%)		2 (40%)	3 (60%)		3 (60%)	2 (40%)		2 (40%)	3 (60%)					5 (100%)	0 (0%)
Staphylococcus epidermidis		0 (0%)	1 (100%)		15 (94%)	1 (6.2%)		1 (100%)	0 (0%)		2 (12%)	14 (88%)		1 (6.2%)	15 (94%)		0 (0%)	1 (100%)		1 (6.7%)	14 (93%)		2 (12%)	14 (88%)		14 (93%)	1 (6.7%)		6 (40%)	9 (60%)		3 (19%)	13 (81%)		1 (100%)	0 (0%)		0 (0%)	1 (100%)		1 (100%)	0 (0%)		14 (93%)	1 (6.7%)		4 (25%)	12 (75%)		2 (13%)	13 (87%)		0 (0%)	15 (100%)
Enterobacter					1 (100%)	0 (0%)					1 (100%)	0 (0%)		0 (0%)	1 (100%)					1 (100%)	0 (0%)		0 (0%)	1 (100%)		1 (100%)	0 (0%)		1 (100%)	0 (0%)		0 (0%)	1 (100%)											1 (100%)	0 (0%)		0 (0%)	1 (100%)		0 (0%)	1 (100%)		1 (100%)	0 (0%)
Staphylococcus aureus					8 (100%)	0 (0%)					1 (12%)	7 (88%)		0 (0%)	8 (100%)					2 (25%)	6 (75%)		0 (0%)	8 (100%)		8 (100%)	0 (0%)		3 (38%)	5 (62%)		1 (12%)	7 (88%)											8 (100%)	0 (0%)		1 (12%)	7 (88%)		0 (0%)	8 (100%)		2 (25%)	6 (75%)
¹ n (%)

# Continue from your code...
datos <- dbresisten %>% 
    gather(key= "farmaco", value= "resultado", -BACTERIA) %>% 
    mutate(farmaco=paste("Farmaco", farmaco))

# Convert 'resultado' to factor for better handling in ggplot
datos$resultado <- as.factor(datos$resultado)

# Create a table with counts of each combination of 'BACTERIA', 'farmaco', and 'resultado'
heatmap_data <- datos %>% 
    group_by(BACTERIA, farmaco, resultado) %>% 
    summarise(count=n()) %>% 
    ungroup()

## `summarise()` has grouped output by 'BACTERIA', 'farmaco'. You can override
## using the `.groups` argument.

# Generate proportions by dividing count by sum of counts for each 'BACTERIA'-'farmaco' combination
heatmap_data <- heatmap_data %>%
    group_by(BACTERIA, farmaco) %>%
    mutate(proportion = count / sum(count)) %>%
    ungroup()


# Filter out blank values
heatmap_data <- heatmap_data %>% filter(resultado != "")

# Plot the heatmap
ggplot(heatmap_data, aes(x=resultado, y=BACTERIA, fill=proportion)) +
    geom_tile() +
    scale_fill_gradient(low = "white", high = "red") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    labs(x="", y="", fill="Proportion") +
    facet_wrap(~farmaco)+theme_test()+
  theme(legend.position='right',text = element_text(size = 12, face="bold"), axis.text.x = element_text(size=10, colour = "black"),axis.text.y = element_text(size=10, colour = "black"),
        legend.text = element_text(size = 12, color = "black"),strip.text.x = element_text(size = 12,face="bold"))

ggplot(heatmap_data, aes(x=resultado, y=farmaco, fill=proportion)) +
    geom_tile() +
    scale_fill_gradient(low = "white", high = "red") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    labs(x="", y="", fill="Proportion") +
    facet_wrap(~BACTERIA)+theme_test()+
  theme(legend.position='right',text = element_text(size = 12, face="bold"), axis.text.x = element_text(size=10, colour = "black"),axis.text.y = element_text(size=10, colour = "black"),
        legend.text = element_text(size = 12, color = "black"),strip.text.x = element_text(size = 12,face="bold"))

library(ggpubr)
datosplot <- datos %>% drop_na() 

ggplot(datosplot, aes(x = BACTERIA, fill = resultado)) +
  stat_count(geom = "bar",
  position = "fill",stat="identity")+
  facet_wrap(. ~ farmaco)+theme_pubr()+
  coord_flip() + theme(legend.text = element_text(size = 8, color = "black"))

## Warning in stat_count(geom = "bar", position = "fill", stat = "identity"):
## Ignoring unknown parameters: `stat`

 #+ theme(text = element_text(size = 5))+theme(axis.text.x = element_text(angle = 90))+theme(axis.text.x = element_text(hjust = 1, vjust = .5))+ scale_fill_brewer(palette="Pastel1")+ylab("")+xlab("")+theme(panel.spacing.x = unit(1, "lines"))+scale_x_discrete(limits = rev)+ scale_y_continuous(expand=expansion(mult=c(0,0.01)))+

datosplot %>% select(BACTERIA, farmaco, resultado
                 ) %>% mutate (farmaco=paste("Farmaco", farmaco)) %>% 
  tbl_strata(strata = farmaco,
    .tbl_fun =
      ~ .x %>%
        tbl_summary(by = resultado, percent = 'row') %>%
        add_n(), 
    .header = "**{strata}**, N = {n}"
  )

Characteristic	Farmaco Farmaco AK, N = 118			Farmaco Farmaco AM, N = 143			Farmaco Farmaco CB, N = 118			Farmaco Farmaco CF, N = 143			Farmaco Farmaco CFX, N = 143			Farmaco Farmaco CL, N = 118			Farmaco Farmaco CLM, N = 25			Farmaco Farmaco CPF, N = 143			Farmaco Farmaco DC, N = 25			Farmaco Farmaco E, N = 25			Farmaco Farmaco GENT, N = 143			Farmaco Farmaco NET, N = 118			Farmaco Farmaco NF, N = 118			Farmaco Farmaco NOF, N = 118			Farmaco Farmaco PE, N = 25			Farmaco Farmaco SXT, N = 143			Farmaco Farmaco TE, N = 25			Farmaco Farmaco VA, N = 25
Characteristic	N	R, N = 13¹	S, N = 105¹	N	R, N = 130¹	S, N = 13¹	N	R, N = 79¹	S, N = 39¹	N	R, N = 77¹	S, N = 66¹	N	R, N = 38¹	S, N = 105¹	N	R, N = 21¹	S, N = 97¹	N	R, N = 5¹	S, N = 20¹	N	R, N = 52¹	S, N = 91¹	N	R, N = 24¹	S, N = 1¹	N	R, N = 11¹	S, N = 14¹	N	R, N = 40¹	S, N = 103¹	N	R, N = 17¹	S, N = 101¹	N	R, N = 12¹	S, N = 106¹	N	R, N = 48¹	S, N = 70¹	N	R, N = 24¹	S, N = 1¹	N	R, N = 65¹	S, N = 78¹	N	R, N = 2¹	S, N = 23¹	N	R, N = 3¹	S, N = 22¹
BACTERIA	118			143			118			143			143			118			25			143			25			25			143			118			118			118			25			143			25			25
Escherichia coli		11 (10%)	95 (90%)		95 (89%)	12 (11%)		71 (67%)	35 (33%)		65 (61%)	42 (39%)		34 (32%)	73 (68%)		17 (16%)	89 (84%)		1 (100%)	0 (0%)		46 (43%)	61 (57%)		1 (100%)	0 (0%)		1 (100%)	0 (0%)		33 (31%)	74 (69%)		14 (13%)	92 (87%)		9 (8.5%)	97 (92%)		43 (41%)	63 (59%)		1 (100%)	0 (0%)		52 (49%)	55 (51%)		0 (0%)	1 (100%)		0 (0%)	1 (100%)
klebsiella pneumoniae		0 (0%)	5 (100%)		5 (100%)	0 (0%)		2 (40%)	3 (60%)		2 (40%)	3 (60%)		1 (20%)	4 (80%)		0 (0%)	5 (100%)					1 (20%)	4 (80%)								0 (0%)	5 (100%)		0 (0%)	5 (100%)		0 (0%)	5 (100%)		2 (40%)	3 (60%)					2 (40%)	3 (60%)
Proteus		1 (100%)	0 (0%)		1 (100%)	0 (0%)		1 (100%)	0 (0%)		1 (100%)	0 (0%)		0 (0%)	1 (100%)		0 (0%)	1 (100%)					1 (100%)	0 (0%)								1 (100%)	0 (0%)		0 (0%)	1 (100%)		0 (0%)	1 (100%)		0 (0%)	1 (100%)					1 (100%)	0 (0%)
Pseudomona aeruginosa		1 (20%)	4 (80%)		5 (100%)	0 (0%)		4 (80%)	1 (20%)		5 (100%)	0 (0%)		2 (40%)	3 (60%)		4 (80%)	1 (20%)					2 (40%)	3 (60%)								2 (40%)	3 (60%)		2 (40%)	3 (60%)		3 (60%)	2 (40%)		2 (40%)	3 (60%)					5 (100%)	0 (0%)
Staphylococcus epidermidis		0 (0%)	1 (100%)		15 (94%)	1 (6.2%)		1 (100%)	0 (0%)		2 (12%)	14 (88%)		1 (6.2%)	15 (94%)		0 (0%)	1 (100%)		1 (6.7%)	14 (93%)		2 (12%)	14 (88%)		14 (93%)	1 (6.7%)		6 (40%)	9 (60%)		3 (19%)	13 (81%)		1 (100%)	0 (0%)		0 (0%)	1 (100%)		1 (100%)	0 (0%)		14 (93%)	1 (6.7%)		4 (25%)	12 (75%)		2 (13%)	13 (87%)		0 (0%)	15 (100%)
Enterobacter					1 (100%)	0 (0%)					1 (100%)	0 (0%)		0 (0%)	1 (100%)					1 (100%)	0 (0%)		0 (0%)	1 (100%)		1 (100%)	0 (0%)		1 (100%)	0 (0%)		0 (0%)	1 (100%)											1 (100%)	0 (0%)		0 (0%)	1 (100%)		0 (0%)	1 (100%)		1 (100%)	0 (0%)
Staphylococcus aureus					8 (100%)	0 (0%)					1 (12%)	7 (88%)		0 (0%)	8 (100%)					2 (25%)	6 (75%)		0 (0%)	8 (100%)		8 (100%)	0 (0%)		3 (38%)	5 (62%)		1 (12%)	7 (88%)											8 (100%)	0 (0%)		1 (12%)	7 (88%)		0 (0%)	8 (100%)		2 (25%)	6 (75%)
¹ n (%)

library(AMR)

## Warning: package 'AMR' was built under R version 4.2.3

dbresistence <- dbresisten


#CLEAN DATA with AMR

#Now we can thus clean our data:

dbresistence$BACTERIA <- as.mo(dbresistence$BACTERIA, info = TRUE)

## ℹ Microorganism translation was uncertain for "Pseudomona aeruginosa"
##   (assumed Pseudomonas aeruginosa). Run mo_uncertainties() to review this,
##   or use add_custom_microorganisms() to add custom entries.

mo_uncertainties()

## Matching scores are based on the resemblance between the input and the full
## taxonomic name, and the pathogenicity in humans. See ?mo_matching_score.
## 
## --------------------------------------------------------------------------------
## "Pseudomona aeruginosa" -> Pseudomonas aeruginosa (B_PSDMN_AERG, 0.977)
## Also matched: Pseudomonas andropogonis (0.708), Pseudomonas
##               antimicrobica (0.680), Pseudomonas alcaligenes (0.674), Pseudomonas
##               acidovorans (0.630), Pseudomonas asiatica (0.500), Pseudomonas
##               azerbaijanoriens (0.500), Pseudomonas azerbaijanorientalis (0.490),
##               Pseudomonas azerbaijanoccidens (0.489), Pseudomonas
##               azerbaijanoccidentalis (0.480), and Pseudomonas argentinensis (0.480)
## 
## Only the first 10 other matches of each record are shown. Run
## print(mo_uncertainties(), n = ...) to view more entries, or save
## mo_uncertainties() to an object.

library(gt)
bact<-dbresistence %>%
  count(mo_name(BACTERIA), sort = TRUE)

bact %>% gt()

mo_name(BACTERIA)	n
Escherichia coli	128
Staphylococcus epidermidis	28
Staphylococcus aureus	22
Klebsiella pneumoniae	6
Pseudomonas aeruginosa	6
(levadura desconocida)	5
Enterobacter	1
Proteus	1

#Antibiotic results
# method 1, be explicit about the columns:
#dbresistence <- dbresistence %>% mutate_at(vars(AK:VA), as.sir)

#method 2:
dbresistence <- dbresistence %>%
  mutate_if(is_sir_eligible, as.sir)

summary(dbresistence)

##    BACTERIA                AK                    AM               
##  Class :mo             Class:sir             Class:sir            
##  <NA>  :0              %R   :11.0% (n=13)    %R   :90.9% (n=130)  
##  Unique:8              %SI  :89.0% (n=105)   %SI  : 9.1% (n=13)   
##  #1    :B_ESCHR_COLI   - %S :89.0% (n=105)   - %S : 9.1% (n=13)   
##  #2    :B_STPHY_EPDR   - %I : 0.0% (n=0)     - %I : 0.0% (n=0)    
##  #3    :B_STPHY_AURS                                              
##      CB                   CF                  CFX               
##  Class:sir            Class:sir            Class:sir            
##  %R   :66.9% (n=79)   %R   :53.8% (n=77)   %R   :26.6% (n=38)   
##  %SI  :33.1% (n=39)   %SI  :46.2% (n=66)   %SI  :73.4% (n=105)  
##  - %S :33.1% (n=39)   - %S :46.2% (n=66)   - %S :73.4% (n=105)  
##  - %I : 0.0% (n=0)    - %I : 0.0% (n=0)    - %I : 0.0% (n=0)    
##                                                                 
##     CPF                   CL                  GENT              
##  Class:sir            Class:sir            Class:sir            
##  %R   :36.4% (n=52)   %R   :17.8% (n=21)   %R   :28.0% (n=40)   
##  %SI  :63.6% (n=91)   %SI  :82.2% (n=97)   %SI  :72.0% (n=103)  
##  - %S :63.6% (n=91)   - %S :82.2% (n=97)   - %S :72.0% (n=103)  
##  - %I : 0.0% (n=0)    - %I : 0.0% (n=0)    - %I : 0.0% (n=0)    
##                                                                 
##     NET                    NF                   NOF              
##  Class:sir             Class:sir             Class:sir           
##  %R   :14.4% (n=17)    %R   :10.2% (n=12)    %R   :40.7% (n=48)  
##  %SI  :85.6% (n=101)   %SI  :89.8% (n=106)   %SI  :59.3% (n=70)  
##  - %S :85.6% (n=101)   - %S :89.8% (n=106)   - %S :59.3% (n=70)  
##  - %I : 0.0% (n=0)     - %I : 0.0% (n=0)     - %I : 0.0% (n=0)   
##                                                                  
##     SXT                  CLM                   DC              
##  Class:sir            Class:sir            Class:sir           
##  %R   :45.5% (n=65)   %R   :20.0% (n=5)    %R   :96.0% (n=24)  
##  %SI  :54.5% (n=78)   %SI  :80.0% (n=20)   %SI  : 4.0% (n=1)   
##  - %S :54.5% (n=78)   - %S :80.0% (n=20)   - %S : 4.0% (n=1)   
##  - %I : 0.0% (n=0)    - %I : 0.0% (n=0)    - %I : 0.0% (n=0)   
##                                                                
##      E                    PE                   TE              
##  Class:sir            Class:sir            Class:sir           
##  %R   :44.0% (n=11)   %R   :96.0% (n=24)   %R   : 8.0% (n=2)   
##  %SI  :56.0% (n=14)   %SI  : 4.0% (n=1)    %SI  :92.0% (n=23)  
##  - %S :56.0% (n=14)   - %S : 4.0% (n=1)    - %S :92.0% (n=23)  
##  - %I : 0.0% (n=0)    - %I : 0.0% (n=0)    - %I : 0.0% (n=0)   
##                                                                
##      VA              
##  Class:sir           
##  %R   :12.0% (n=3)   
##  %SI  :88.0% (n=22)  
##  - %S :88.0% (n=22)  
##  - %I : 0.0% (n=0)   
##

dbresisten<-dbresisten %>%
  mutate_if(is_sir_eligible, as.sir)

    ggplot(dbresisten) +
    geom_sir() +
    scale_y_percent() +
    scale_sir_colours() +
    labels_sir_count() +
    theme_sir()+ theme(axis.text.x = element_text(angle = 45, hjust = 1))

## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "CPF".

## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "NF".

## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "NOF".

## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "DC".

## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "E".

## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "PE".

## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "CPF".

## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "NF".

## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "NOF".

## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "DC".

## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "E".

## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "PE".

## Warning: Removed 12 rows containing missing values (`position_stack()`).

## Warning: Removed 18 rows containing missing values (`position_stack()`).

ab<-antibiogram(dbresisten,)


??antibiogram

## starting httpd help server ... done

ab %>% gt()

Patógeno (N min-max)	AK	AM	CB	CF	CFX	CL	CPF	GENT	NET	NF	NOF	SXT
E. coli (1-107)	90	11	33	39	68	84	57	69	87	92	59	51

 dbresisten %>% group_by(mo_name(BACTERIA)) %>%  summarise(amoxicillin = resistance(AM))

## Warning: There were 7 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `amoxicillin = resistance(AM)`.
## ℹ In group 1: `mo_name(BACTERIA) = "(levadura desconocida)"`.
## Caused by warning:
## ! Introducing NA: no results available for AM in group: mo_name(BACTERIA) =
## "(levadura desconocida)" (minimum = 30).
## ℹ Run ]8;;ide:run:dplyr::last_dplyr_warnings()dplyr::last_dplyr_warnings()]8;; to see the 6 remaining warnings.

## # A tibble: 8 × 2
##   `mo_name(BACTERIA)`        amoxicillin
##   <chr>                            <dbl>
## 1 (levadura desconocida)          NA    
## 2 Enterobacter                    NA    
## 3 Escherichia coli                 0.888
## 4 Klebsiella pneumoniae           NA    
## 5 Proteus                         NA    
## 6 Pseudomonas aeruginosa          NA    
## 7 Staphylococcus aureus           NA    
## 8 Staphylococcus epidermidis      NA

# Continue from your code...
datos <- dbresisten %>% 
    gather(key= "farmaco", value= "resultado", -BACTERIA) 

# Convert 'resultado' to factor for better handling in ggplot
datos$resultado <- as.factor(datos$resultado)

# Create a table with counts of each combination of 'BACTERIA', 'farmaco', and 'resultado'
heatmap_data <- datos %>% 
    group_by(BACTERIA, farmaco, resultado) %>% 
    summarise(count=n()) %>% 
    ungroup()

## `summarise()` has grouped output by 'BACTERIA', 'farmaco'. You can override
## using the `.groups` argument.

# Generate proportions by dividing count by sum of counts for each 'BACTERIA'-'farmaco' combination
heatmap_data <- heatmap_data %>%
    group_by(BACTERIA, farmaco) %>%
    mutate(proportion = count / sum(count)) %>%
    ungroup()


# Filter out blank values
heatmap_data <- heatmap_data %>% filter(resultado != "")

# Plot the heatmap
ggplot(heatmap_data, aes(x=resultado, y=BACTERIA, fill=proportion)) +
    geom_tile() +
    scale_fill_gradient(low = "white", high = "red") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    labs(x="", y="", fill="Proportion") +
    facet_wrap(~farmaco)+theme_test()+theme(legend.position='right',text = element_text(size = 12, face="bold"), axis.text.x = element_text(size=10, colour = "black"),axis.text.y = element_text(size=10, colour = "black"),
        legend.text = element_text(size = 12, color = "black"),strip.text.x = element_text(size = 12,face="bold"))

ggplot(heatmap_data, aes(x=resultado, y=farmaco, fill=proportion)) +
    geom_tile() +
    scale_fill_gradient(low = "#C0A635", high = "#033195") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    labs(x="", y="", fill="Proportion") +
    facet_wrap(~BACTERIA)+theme_test()+
  theme(legend.position='right',text = element_text(size = 12, face="bold"), axis.text.x = element_text(size=16, colour = "black"),axis.text.y = element_text(size=10, colour = "black"),
        legend.text = element_text(size = 12, color = "black"),strip.text.x = element_text(size = 12,face="bold"))

library(ggpubr)
datosplot <- datos %>% drop_na() 

ggplot(datosplot, aes(x = BACTERIA, fill = resultado)) + 
  stat_count(geom = "bar",
  position = "fill",stat="identity",color="black")+
  facet_wrap(. ~ farmaco)+theme_classic2()+
  coord_flip() + theme(legend.text = element_text(size = 8, color = "black"))+ scale_fill_manual(values = alpha(c( "#C0A635" ,"#033195"), .75))+
  theme(legend.position='right',text = element_text(size = 12, face="bold"), axis.text.x = element_text(size=10, colour = "black"),axis.text.y = element_text(size=10, colour = "black"),
        legend.text = element_text(size = 12, color = "black"),strip.text.x = element_text(size = 12,face="bold"))

## Warning in stat_count(geom = "bar", position = "fill", stat = "identity", :
## Ignoring unknown parameters: `stat`

 #+ theme(text = element_text(size = 5))+theme(axis.text.x = element_text(angle = 90))+theme(axis.text.x = element_text(hjust = 1, vjust = .5))+ scale_fill_brewer(palette="Pastel1")+ylab("")+xlab("")+theme(panel.spacing.x = unit(1, "lines"))+scale_x_discrete(limits = rev)+ scale_y_continuous(expand=expansion(mult=c(0,0.01)))+

dbresisclin <- db %>% select(EDAD, BACTERIA, AK:VA,)  

dbresisclin<-dbresisclin %>% mutate_at(vars(AK:VA), as.character) 

dbresistencin<- dbresisclin %>%
  mutate_at(vars(AK:VA), ~recode(.,  `1` = "S", `3` = "R", `NA`= ""))


# Gather data to long format
long_df <- dbresistencin %>% 
  gather(key = "farmaco", value = "resultado", -EDAD, -BACTERIA)

# View the resulting long dataframe
head(long_df)

## # A tibble: 6 × 4
##    EDAD BACTERIA              farmaco resultado
##   <dbl> <chr>                 <chr>   <chr>    
## 1    49 Escherichia coli      AK      S        
## 2    34 klebsiella pneumoniae AK      <NA>     
## 3    33 Yeast                 AK      <NA>     
## 4    53 Escherichia coli      AK      S        
## 5    45 Escherichia coli      AK      S        
## 6    33 Staphylococcus aureus AK      <NA>

long_df<-long_df %>% drop_na()

long_df %>% tbl_summary(by=resultado) %>% add_p()

## There was an error in 'add_p()/add_difference()' for variable 'BACTERIA', p-value omitted:
## Error in stats::fisher.test(c("Escherichia coli", "Escherichia coli", : FEXACT error 7(location). LDSTP=18360 is too small for this problem,
##   (pastp=69.7532, ipn_0:=ipoin[itp=564]=8010, stp[ipn_0]=69.4681).
## Increase workspace or consider using 'simulate.p.value=TRUE'

Characteristic	R, N = 634¹	S, N = 1,034¹	p-value²
EDAD DEL PACIENTE	38 (26, 53)	37 (23, 52)	0.029
BACTERIA
Enterobacter	7 (1.1%)	5 (0.5%)
Escherichia coli	477 (75%)	783 (76%)
klebsiella pneumoniae	15 (2.4%)	45 (4.4%)
Proteus	7 (1.1%)	5 (0.5%)
Pseudomona aeruginosa	37 (5.8%)	23 (2.2%)
Staphylococcus aureus	31 (4.9%)	53 (5.1%)
Staphylococcus epidermidis	60 (9.5%)	120 (12%)
farmaco			<0.001
AK	13 (2.1%)	103 (10.0%)
AM	126 (20%)	13 (1.3%)
CB	77 (12%)	39 (3.8%)
CF	75 (12%)	64 (6.2%)
CFX	36 (5.7%)	103 (10.0%)
CL	20 (3.2%)	96 (9.3%)
CLM	4 (0.6%)	19 (1.8%)
CPF	50 (7.9%)	89 (8.6%)
DC	22 (3.5%)	1 (<0.1%)
E	10 (1.6%)	13 (1.3%)
GENT	39 (6.2%)	100 (9.7%)
NET	15 (2.4%)	101 (9.8%)
NF	12 (1.9%)	104 (10%)
NOF	46 (7.3%)	70 (6.8%)
PE	22 (3.5%)	1 (<0.1%)
SXT	62 (9.8%)	77 (7.4%)
TE	2 (0.3%)	21 (2.0%)
VA	3 (0.5%)	20 (1.9%)
¹ Median (IQR); n (%)
² Wilcoxon rank sum test; Pearson's Chi-squared test

# Load necessary libraries
library(dplyr)
library(ggplot2)
library(tidyr)
library(purrr)
library(broom)

## Warning: package 'broom' was built under R version 4.2.3

# Your data preparation process here
dbresisclin <- db %>% select(EDAD, BACTERIA, AK:VA)
dbresisclin <- dbresisclin %>% mutate_at(vars(AK:VA), as.character) 

dbresistencin <- dbresisclin %>%
  mutate_at(vars(AK:VA), ~recode(.,  `1` = "S", `3` = "R", `NA`= ""))

# Gather data to long format
long_df <- dbresistencin %>% 
  gather(key = "farmaco", value = "resultado", -EDAD, -BACTERIA)

# Filter out blank values
long_df <- long_df %>% filter(resultado != "")

# Calculate the count for each bacteria and result
df_counts <- long_df %>%
  group_by(BACTERIA, resultado) %>%
  summarise(n = n(), .groups = "drop")

# Calculate the percentage for each bacteria and result
df_counts <- df_counts %>%
  group_by(BACTERIA) %>%
  mutate(percent = n / sum(n) * 100)

# Run chi square test for each bacteria and add p-value to data frame
df_counts <- df_counts %>%
  nest(data = -BACTERIA) %>%
  mutate(chi_test = map(data, ~ chisq.test(.x$n)$p.value)) %>%
  unnest(data, chi_test)

## Warning: `unnest()` has a new interface. See `?unnest` for details.
## ℹ Try `df %>% unnest(c(data, chi_test))`, with `mutate()` if needed.

# Add significance labels
df_counts <- df_counts %>%
  mutate(signif_label = case_when(
    chi_test > 0.05 ~ "NS",
    chi_test <= 0.001 ~ "<0.001",
    chi_test >= 0.004 ~ "0.004",
    TRUE ~ as.character(chi_test)
  ))

# Create the bar plot
ggplot(df_counts, aes(x = resultado, y = percent, fill = resultado)) +
  geom_bar(stat = "identity") +
  facet_wrap(~BACTERIA, scales = "free_y") +
  geom_text(aes(label = paste0("p = ", signif_label)), vjust = -1) +
  labs(title = "Bacteria Resistance and Sensitivity", 
       x = "Result", 
       y = "Percentage", 
       fill = "")+theme_sir()

# Load necessary libraries
library(dplyr)
library(ggplot2)
library(tidyr)

# Your data preparation process here
dbresisclin <- db %>% select(EDAD, BACTERIA, AK:VA)
dbresisclin <- dbresisclin %>% mutate_at(vars(AK:VA), as.character) 

dbresistencin <- dbresisclin %>%
  mutate_at(vars(AK:VA), ~recode(.,  `1` = "S", `3` = "R", `NA`= ""))

# Gather data to long format
long_df <- dbresistencin %>% 
  gather(key = "farmaco", value = "resultado", -EDAD, -BACTERIA)

# Filter out blank values and select only 'R' results
long_df <- long_df %>% filter(resultado == "R")

# Calculate the count for each bacteria and result
df_counts <- long_df %>%
  group_by(BACTERIA, farmaco) %>%
  summarise(n = n(), .groups = "drop") 

# Calculate the total count for each bacteria
df_totals <- long_df %>%
  group_by(BACTERIA) %>%
  summarise(total = n(), .groups = "drop")

# Join the data frames and calculate proportions and confidence intervals
df_final <- inner_join(df_counts, df_totals, by = "BACTERIA") %>%
  mutate(proportion = n / total,
         conf_low = proportion - 1.96 * sqrt((proportion * (1 - proportion)) / total),
         conf_high = proportion + 1.96 * sqrt((proportion * (1 - proportion)) / total))


# Create the bar plot
ggplot(df_final, aes(x = farmaco, y = proportion, fill = BACTERIA)) +
  geom_bar(stat = "identity", position = position_dodge())  +
  labs(title = "Bacteria Resistance Frequency", 
       x = "Drug", 
       y = "Resistance Frequency", 
       fill = "Bacteria")

#filter enterobacter
filtentero <- df_final %>% filter(BACTERIA == "Enterobacter")
enterores<-ggplot(filtentero, aes(x = farmaco, y = proportion, fill = BACTERIA))+
  geom_errorbar(aes(ymin = conf_low, ymax = conf_high), width = 0.2, position = position_dodge(0.9))+
  geom_bar(stat = "identity", position = position_dodge())  +
  labs(title = "", 
       x = "Drug", 
       y = "Resistance Frequency", 
       fill = "")
enterores

#filter e colI
filtecoli<-df_final %>% filter(BACTERIA == "Escherichia coli")
ecolires<-ggplot(filtecoli, aes(x = farmaco, y = proportion, fill = BACTERIA))+
  geom_errorbar(aes(ymin = conf_low, ymax = conf_high), width = 0.2, position = position_dodge(0.9))+
  geom_bar(stat = "identity", position = position_dodge())  +
  labs(title = "", 
       x = "Drug", 
       y = "Resistance Frequency", 
       fill = "")
ecolires

#Pseudomona aeruginosa
filtepseudo<-df_final %>% filter(BACTERIA == "Pseudomona aeruginosa")
pseudores<-ggplot(filtepseudo, aes(x = farmaco, y = proportion, fill = BACTERIA))+
  geom_errorbar(aes(ymin = conf_low, ymax = conf_high), width = 0.2, position = position_dodge(0.9))+
  geom_bar(stat = "identity", position = position_dodge())  +
  labs(title = "", 
       x = "Drug", 
       y = "Resistance Frequency", 
       fill = "")
pseudores

# Load AMR package
library(AMR)
# Calculate multidrug resistance
mdr_results <- dbresistencin %>%
  select(BACTERIA, AK:VA) %>%
  mutate(across(AK:VA, as.sir)) %>%
  mutate(mdr = mdro(.))

##  (54 isolates had no test results)

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `mdr = mdro(.)`.
## Caused by warning:
## ! in mdro(): NA introduced for isolates where the available percentage of
## antimicrobial classes was below 50% (set with pct_required_classes)

# View the results
head(mdr_results)

## # A tibble: 6 × 20
##   BACTERIA     AK    AM    CB    CF    CFX   CPF   CL    GENT  NET   NF    NOF  
##   <chr>        <sir> <sir> <sir> <sir> <sir> <sir> <sir> <sir> <sir> <sir> <sir>
## 1 Escherichia… S     R     S     R     S     R     S     R     S     R     S    
## 2 klebsiella … NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## 3 Yeast        NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## 4 Escherichia… S     R     R     R     S     R     R     S     S     S     S    
## 5 Escherichia… S     R     R     R     S     S     S     S     R     S     R    
## 6 Staphylococ… NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## # ℹ 8 more variables: SXT <sir>, CLM <sir>, DC <sir>, E <sir>, PE <sir>,
## #   TE <sir>, VA <sir>, mdr <ord>

# Chi-squared test
# Fisher's Exact Test
fisher_test <- fisher.test(table(mdr_results$BACTERIA, mdr_results$mdr))
print(fisher_test)

## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mdr_results$BACTERIA, mdr_results$mdr)
## p-value = 9.115e-15
## alternative hypothesis: two.sided

fisher_test$p.value

## [1] 9.114706e-15

# Bar plot of multidrug resistance
ggplot(mdr_results, aes(x = BACTERIA, fill = mdr, na.rm = TRUE)) +
  geom_bar(position = "fill") +
  labs(x = "Bacteria", y = "Proportion", fill = "MDR Status") +
  scale_fill_discrete(labels = c("Non-MDR", "MDR")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Load necessary libraries
library(dplyr)

# Define start and end dates
start_date <- as.Date("2020-01-01")
end_date <- as.Date("2023-12-31")

# Add date column
dbresistencin <- dbresistencin %>%
  mutate(FECHA = sample(seq(start_date, end_date, by = "day"), size = nrow(dbresistencin), replace = TRUE))

# Load necessary libraries
library(ggplot2)
library(lubridate)

# Convert date to numeric
dbresistencin <- dbresistencin %>%
  mutate(FECHA = as.numeric(FECHA))

# Remove rows with NA in columns AK to VA
dbresistencin <- dbresistencin %>% drop_na(AK:VA)


# Fit linear regression model
#fit <- lm(AK ~ FECHA, data = dbresistencin, na.rm = TRUE)

# Make predictions
#dbresistencin$pred <- predict(fit, newdata = dbresistencin)

# Plot actual vs predicted
#ggplot(dbresistencin, aes(x = as.Date(FECHA), y = AK)) +
 # geom_point() +
  #geom_line(aes(y = pred), color = "red")

ARM

MDatos

2023-05-12