library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'purrr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'forcats' was built under R version 4.2.3
## Warning: package 'lubridate' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.2.3
library(haven)
## Warning: package 'haven' was built under R version 4.2.3
DATOS_RESISTENCIA_BACTERIANA <- read_sav("C:/Users/fidel/OneDrive - UNIVERSIDAD AUTONOMA DE SINALOA/INVESTIGACIÓN DGPI/DATOS_RESISTENCIA_BACTERIANA.sav")
  db<-DATOS_RESISTENCIA_BACTERIANA

head(db)
## # A tibble: 6 × 35
##   FOLIO FECHA      NOMBRE       SEXO     EDAD EDAD1   CULTIVO BACTERIA BACTERIA1
##   <dbl> <date>     <chr>        <dbl+l> <dbl> <dbl+l> <dbl+l> <dbl+lb> <dbl+lbl>
## 1     1 2022-02-22 CLAUDIA MAB… 1 [FEM…    49 3 [> 3… 1 [URO… 1 [ESCH… 1 [GRAM …
## 2     2 2022-02-22 JOSE PATIÑO  2 [MAS…    34 2 [20-… 2 [EXU… 4 [KLEB… 1 [GRAM …
## 3     3 2022-02-22 ANA KAREN D… 1 [FEM…    33 2 [20-… 1 [URO… 9 [LEVA… 3 [LEVAD…
## 4     4 2022-02-22 EVANGELINA … 1 [FEM…    53 3 [> 3… 1 [URO… 1 [ESCH… 1 [GRAM …
## 5     5 2022-02-22 ALEJANDRA R… 1 [FEM…    45 2 [20-… 1 [URO… 1 [ESCH… 1 [GRAM …
## 6     6 2022-02-22 MARIA RAMIR… 1 [FEM…    33 2 [20-… 1 [URO… 6 [S. A… 2 [GRAM …
## # ℹ 26 more variables: LEUCOCITOS <dbl+lbl>, ERITROCITOS <dbl+lbl>, PH <dbl>,
## #   NITRITOS <dbl+lbl>, PROTEINAS <dbl+lbl>, GLUCOSA <dbl+lbl>,
## #   SANGRE <dbl+lbl>, AK <dbl+lbl>, AM <dbl+lbl>, CB <dbl+lbl>, CF <dbl+lbl>,
## #   CFX <dbl+lbl>, CPF <dbl+lbl>, CL <dbl+lbl>, GENT <dbl+lbl>, NET <dbl+lbl>,
## #   NF <dbl+lbl>, NOF <dbl+lbl>, SXT <dbl+lbl>, CLM <dbl+lbl>, DC <dbl+lbl>,
## #   E <dbl+lbl>, PE <dbl+lbl>, TE <dbl+lbl>, VA <dbl+lbl>,
## #   RESISTENCIA <dbl+lbl>
head(db)
## # A tibble: 6 × 35
##   FOLIO FECHA      NOMBRE       SEXO     EDAD EDAD1   CULTIVO BACTERIA BACTERIA1
##   <dbl> <date>     <chr>        <dbl+l> <dbl> <dbl+l> <dbl+l> <dbl+lb> <dbl+lbl>
## 1     1 2022-02-22 CLAUDIA MAB… 1 [FEM…    49 3 [> 3… 1 [URO… 1 [ESCH… 1 [GRAM …
## 2     2 2022-02-22 JOSE PATIÑO  2 [MAS…    34 2 [20-… 2 [EXU… 4 [KLEB… 1 [GRAM …
## 3     3 2022-02-22 ANA KAREN D… 1 [FEM…    33 2 [20-… 1 [URO… 9 [LEVA… 3 [LEVAD…
## 4     4 2022-02-22 EVANGELINA … 1 [FEM…    53 3 [> 3… 1 [URO… 1 [ESCH… 1 [GRAM …
## 5     5 2022-02-22 ALEJANDRA R… 1 [FEM…    45 2 [20-… 1 [URO… 1 [ESCH… 1 [GRAM …
## 6     6 2022-02-22 MARIA RAMIR… 1 [FEM…    33 2 [20-… 1 [URO… 6 [S. A… 2 [GRAM …
## # ℹ 26 more variables: LEUCOCITOS <dbl+lbl>, ERITROCITOS <dbl+lbl>, PH <dbl>,
## #   NITRITOS <dbl+lbl>, PROTEINAS <dbl+lbl>, GLUCOSA <dbl+lbl>,
## #   SANGRE <dbl+lbl>, AK <dbl+lbl>, AM <dbl+lbl>, CB <dbl+lbl>, CF <dbl+lbl>,
## #   CFX <dbl+lbl>, CPF <dbl+lbl>, CL <dbl+lbl>, GENT <dbl+lbl>, NET <dbl+lbl>,
## #   NF <dbl+lbl>, NOF <dbl+lbl>, SXT <dbl+lbl>, CLM <dbl+lbl>, DC <dbl+lbl>,
## #   E <dbl+lbl>, PE <dbl+lbl>, TE <dbl+lbl>, VA <dbl+lbl>,
## #   RESISTENCIA <dbl+lbl>
# Recodificar los valores de la variable1 según un diccionario
db$BACTERIA = as.numeric(db$BACTERIA)

db <- db %>%
  mutate(BACTERIA = recode(BACTERIA, `1` = "Escherichia coli", `2` = "Pseudomona aeruginosa", `3` = "Shigella", `4` ="klebsiella pneumoniae" , `5` = "klebsiella pneumoniae", `6`= "Staphylococcus aureus", `7` = "Staphylococcus epidermidis", `8` ="Enterobacter", `9`= "Yeast", `10`= "Proteus")) 


str(db$BACTERIA)
##  chr [1:197] "Escherichia coli" "klebsiella pneumoniae" "Yeast" ...
library(gtsummary)
## Warning: package 'gtsummary' was built under R version 4.2.3
db %>% select(BACTERIA) %>%  tbl_summary() 
Characteristic N = 1971
BACTERIA
    Enterobacter 1 (0.5%)
    Escherichia coli 128 (65%)
    klebsiella pneumoniae 6 (3.0%)
    Proteus 1 (0.5%)
    Pseudomona aeruginosa 6 (3.0%)
    Staphylococcus aureus 22 (11%)
    Staphylococcus epidermidis 28 (14%)
    Yeast 5 (2.5%)
1 n (%)
dbresis <- db %>% select(BACTERIA, AK:VA)

dbresis<-dbresis %>% mutate_at(vars(AK:VA), as.character) 

dbresisten<- dbresis %>%
  mutate_at(vars(AK:VA), ~recode(.,  `1` = "S", `3` = "R", `NA`= "")) 

datos <- dbresisten %>% gather(key= "farmaco", value= "resultado", -BACTERIA)

datos %>% select(BACTERIA, farmaco, resultado
                 ) %>% mutate (farmaco=paste("Farmaco", farmaco)) %>% 
  tbl_strata(strata = farmaco,
    .tbl_fun =
      ~ .x %>%
        tbl_summary(by = resultado, percent = 'row') %>%
        add_n(), 
    .header = "**{strata}**, N = {n}"
  )
## 79 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 54 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 79 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 54 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 54 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 79 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 172 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 54 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 172 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 172 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 54 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 79 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 79 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 79 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 172 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 54 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 172 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
## 172 observations missing `resultado` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `resultado` column before passing to `tbl_summary()`.
Characteristic Farmaco AK, N = 197 Farmaco AM, N = 197 Farmaco CB, N = 197 Farmaco CF, N = 197 Farmaco CFX, N = 197 Farmaco CL, N = 197 Farmaco CLM, N = 197 Farmaco CPF, N = 197 Farmaco DC, N = 197 Farmaco E, N = 197 Farmaco GENT, N = 197 Farmaco NET, N = 197 Farmaco NF, N = 197 Farmaco NOF, N = 197 Farmaco PE, N = 197 Farmaco SXT, N = 197 Farmaco TE, N = 197 Farmaco VA, N = 197
N R, N = 131 S, N = 1051 N R, N = 1301 S, N = 131 N R, N = 791 S, N = 391 N R, N = 771 S, N = 661 N R, N = 381 S, N = 1051 N R, N = 211 S, N = 971 N R, N = 51 S, N = 201 N R, N = 521 S, N = 911 N R, N = 241 S, N = 11 N R, N = 111 S, N = 141 N R, N = 401 S, N = 1031 N R, N = 171 S, N = 1011 N R, N = 121 S, N = 1061 N R, N = 481 S, N = 701 N R, N = 241 S, N = 11 N R, N = 651 S, N = 781 N R, N = 21 S, N = 231 N R, N = 31 S, N = 221
BACTERIA 118 143 118 143 143 118 25 143 25 25 143 118 118 118 25 143 25 25
    Escherichia coli 11 (10%) 95 (90%) 95 (89%) 12 (11%) 71 (67%) 35 (33%) 65 (61%) 42 (39%) 34 (32%) 73 (68%) 17 (16%) 89 (84%) 1 (100%) 0 (0%) 46 (43%) 61 (57%) 1 (100%) 0 (0%) 1 (100%) 0 (0%) 33 (31%) 74 (69%) 14 (13%) 92 (87%) 9 (8.5%) 97 (92%) 43 (41%) 63 (59%) 1 (100%) 0 (0%) 52 (49%) 55 (51%) 0 (0%) 1 (100%) 0 (0%) 1 (100%)
    klebsiella pneumoniae 0 (0%) 5 (100%) 5 (100%) 0 (0%) 2 (40%) 3 (60%) 2 (40%) 3 (60%) 1 (20%) 4 (80%) 0 (0%) 5 (100%) 1 (20%) 4 (80%) 0 (0%) 5 (100%) 0 (0%) 5 (100%) 0 (0%) 5 (100%) 2 (40%) 3 (60%) 2 (40%) 3 (60%)
    Proteus 1 (100%) 0 (0%) 1 (100%) 0 (0%) 1 (100%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 1 (100%) 1 (100%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 1 (100%) 0 (0%) 1 (100%) 1 (100%) 0 (0%)
    Pseudomona aeruginosa 1 (20%) 4 (80%) 5 (100%) 0 (0%) 4 (80%) 1 (20%) 5 (100%) 0 (0%) 2 (40%) 3 (60%) 4 (80%) 1 (20%) 2 (40%) 3 (60%) 2 (40%) 3 (60%) 2 (40%) 3 (60%) 3 (60%) 2 (40%) 2 (40%) 3 (60%) 5 (100%) 0 (0%)
    Staphylococcus epidermidis 0 (0%) 1 (100%) 15 (94%) 1 (6.2%) 1 (100%) 0 (0%) 2 (12%) 14 (88%) 1 (6.2%) 15 (94%) 0 (0%) 1 (100%) 1 (6.7%) 14 (93%) 2 (12%) 14 (88%) 14 (93%) 1 (6.7%) 6 (40%) 9 (60%) 3 (19%) 13 (81%) 1 (100%) 0 (0%) 0 (0%) 1 (100%) 1 (100%) 0 (0%) 14 (93%) 1 (6.7%) 4 (25%) 12 (75%) 2 (13%) 13 (87%) 0 (0%) 15 (100%)
    Enterobacter 1 (100%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 1 (100%) 1 (100%) 0 (0%) 0 (0%) 1 (100%) 1 (100%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 1 (100%) 1 (100%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 1 (100%) 1 (100%) 0 (0%)
    Staphylococcus aureus 8 (100%) 0 (0%) 1 (12%) 7 (88%) 0 (0%) 8 (100%) 2 (25%) 6 (75%) 0 (0%) 8 (100%) 8 (100%) 0 (0%) 3 (38%) 5 (62%) 1 (12%) 7 (88%) 8 (100%) 0 (0%) 1 (12%) 7 (88%) 0 (0%) 8 (100%) 2 (25%) 6 (75%)
1 n (%)
# Continue from your code...
datos <- dbresisten %>% 
    gather(key= "farmaco", value= "resultado", -BACTERIA) %>% 
    mutate(farmaco=paste("Farmaco", farmaco))

# Convert 'resultado' to factor for better handling in ggplot
datos$resultado <- as.factor(datos$resultado)

# Create a table with counts of each combination of 'BACTERIA', 'farmaco', and 'resultado'
heatmap_data <- datos %>% 
    group_by(BACTERIA, farmaco, resultado) %>% 
    summarise(count=n()) %>% 
    ungroup()
## `summarise()` has grouped output by 'BACTERIA', 'farmaco'. You can override
## using the `.groups` argument.
# Generate proportions by dividing count by sum of counts for each 'BACTERIA'-'farmaco' combination
heatmap_data <- heatmap_data %>%
    group_by(BACTERIA, farmaco) %>%
    mutate(proportion = count / sum(count)) %>%
    ungroup()


# Filter out blank values
heatmap_data <- heatmap_data %>% filter(resultado != "")

# Plot the heatmap
ggplot(heatmap_data, aes(x=resultado, y=BACTERIA, fill=proportion)) +
    geom_tile() +
    scale_fill_gradient(low = "white", high = "red") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    labs(x="", y="", fill="Proportion") +
    facet_wrap(~farmaco)+theme_test()+
  theme(legend.position='right',text = element_text(size = 12, face="bold"), axis.text.x = element_text(size=10, colour = "black"),axis.text.y = element_text(size=10, colour = "black"),
        legend.text = element_text(size = 12, color = "black"),strip.text.x = element_text(size = 12,face="bold"))

ggplot(heatmap_data, aes(x=resultado, y=farmaco, fill=proportion)) +
    geom_tile() +
    scale_fill_gradient(low = "white", high = "red") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    labs(x="", y="", fill="Proportion") +
    facet_wrap(~BACTERIA)+theme_test()+
  theme(legend.position='right',text = element_text(size = 12, face="bold"), axis.text.x = element_text(size=10, colour = "black"),axis.text.y = element_text(size=10, colour = "black"),
        legend.text = element_text(size = 12, color = "black"),strip.text.x = element_text(size = 12,face="bold"))

library(ggpubr)
datosplot <- datos %>% drop_na() 

ggplot(datosplot, aes(x = BACTERIA, fill = resultado)) +
  stat_count(geom = "bar",
  position = "fill",stat="identity")+
  facet_wrap(. ~ farmaco)+theme_pubr()+
  coord_flip() + theme(legend.text = element_text(size = 8, color = "black"))
## Warning in stat_count(geom = "bar", position = "fill", stat = "identity"):
## Ignoring unknown parameters: `stat`

 #+ theme(text = element_text(size = 5))+theme(axis.text.x = element_text(angle = 90))+theme(axis.text.x = element_text(hjust = 1, vjust = .5))+ scale_fill_brewer(palette="Pastel1")+ylab("")+xlab("")+theme(panel.spacing.x = unit(1, "lines"))+scale_x_discrete(limits = rev)+ scale_y_continuous(expand=expansion(mult=c(0,0.01)))+
datosplot %>% select(BACTERIA, farmaco, resultado
                 ) %>% mutate (farmaco=paste("Farmaco", farmaco)) %>% 
  tbl_strata(strata = farmaco,
    .tbl_fun =
      ~ .x %>%
        tbl_summary(by = resultado, percent = 'row') %>%
        add_n(), 
    .header = "**{strata}**, N = {n}"
  )
Characteristic Farmaco Farmaco AK, N = 118 Farmaco Farmaco AM, N = 143 Farmaco Farmaco CB, N = 118 Farmaco Farmaco CF, N = 143 Farmaco Farmaco CFX, N = 143 Farmaco Farmaco CL, N = 118 Farmaco Farmaco CLM, N = 25 Farmaco Farmaco CPF, N = 143 Farmaco Farmaco DC, N = 25 Farmaco Farmaco E, N = 25 Farmaco Farmaco GENT, N = 143 Farmaco Farmaco NET, N = 118 Farmaco Farmaco NF, N = 118 Farmaco Farmaco NOF, N = 118 Farmaco Farmaco PE, N = 25 Farmaco Farmaco SXT, N = 143 Farmaco Farmaco TE, N = 25 Farmaco Farmaco VA, N = 25
N R, N = 131 S, N = 1051 N R, N = 1301 S, N = 131 N R, N = 791 S, N = 391 N R, N = 771 S, N = 661 N R, N = 381 S, N = 1051 N R, N = 211 S, N = 971 N R, N = 51 S, N = 201 N R, N = 521 S, N = 911 N R, N = 241 S, N = 11 N R, N = 111 S, N = 141 N R, N = 401 S, N = 1031 N R, N = 171 S, N = 1011 N R, N = 121 S, N = 1061 N R, N = 481 S, N = 701 N R, N = 241 S, N = 11 N R, N = 651 S, N = 781 N R, N = 21 S, N = 231 N R, N = 31 S, N = 221
BACTERIA 118 143 118 143 143 118 25 143 25 25 143 118 118 118 25 143 25 25
    Escherichia coli 11 (10%) 95 (90%) 95 (89%) 12 (11%) 71 (67%) 35 (33%) 65 (61%) 42 (39%) 34 (32%) 73 (68%) 17 (16%) 89 (84%) 1 (100%) 0 (0%) 46 (43%) 61 (57%) 1 (100%) 0 (0%) 1 (100%) 0 (0%) 33 (31%) 74 (69%) 14 (13%) 92 (87%) 9 (8.5%) 97 (92%) 43 (41%) 63 (59%) 1 (100%) 0 (0%) 52 (49%) 55 (51%) 0 (0%) 1 (100%) 0 (0%) 1 (100%)
    klebsiella pneumoniae 0 (0%) 5 (100%) 5 (100%) 0 (0%) 2 (40%) 3 (60%) 2 (40%) 3 (60%) 1 (20%) 4 (80%) 0 (0%) 5 (100%) 1 (20%) 4 (80%) 0 (0%) 5 (100%) 0 (0%) 5 (100%) 0 (0%) 5 (100%) 2 (40%) 3 (60%) 2 (40%) 3 (60%)
    Proteus 1 (100%) 0 (0%) 1 (100%) 0 (0%) 1 (100%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 1 (100%) 1 (100%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 1 (100%) 0 (0%) 1 (100%) 1 (100%) 0 (0%)
    Pseudomona aeruginosa 1 (20%) 4 (80%) 5 (100%) 0 (0%) 4 (80%) 1 (20%) 5 (100%) 0 (0%) 2 (40%) 3 (60%) 4 (80%) 1 (20%) 2 (40%) 3 (60%) 2 (40%) 3 (60%) 2 (40%) 3 (60%) 3 (60%) 2 (40%) 2 (40%) 3 (60%) 5 (100%) 0 (0%)
    Staphylococcus epidermidis 0 (0%) 1 (100%) 15 (94%) 1 (6.2%) 1 (100%) 0 (0%) 2 (12%) 14 (88%) 1 (6.2%) 15 (94%) 0 (0%) 1 (100%) 1 (6.7%) 14 (93%) 2 (12%) 14 (88%) 14 (93%) 1 (6.7%) 6 (40%) 9 (60%) 3 (19%) 13 (81%) 1 (100%) 0 (0%) 0 (0%) 1 (100%) 1 (100%) 0 (0%) 14 (93%) 1 (6.7%) 4 (25%) 12 (75%) 2 (13%) 13 (87%) 0 (0%) 15 (100%)
    Enterobacter 1 (100%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 1 (100%) 1 (100%) 0 (0%) 0 (0%) 1 (100%) 1 (100%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 1 (100%) 1 (100%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 1 (100%) 1 (100%) 0 (0%)
    Staphylococcus aureus 8 (100%) 0 (0%) 1 (12%) 7 (88%) 0 (0%) 8 (100%) 2 (25%) 6 (75%) 0 (0%) 8 (100%) 8 (100%) 0 (0%) 3 (38%) 5 (62%) 1 (12%) 7 (88%) 8 (100%) 0 (0%) 1 (12%) 7 (88%) 0 (0%) 8 (100%) 2 (25%) 6 (75%)
1 n (%)
library(AMR)
## Warning: package 'AMR' was built under R version 4.2.3
dbresistence <- dbresisten


#CLEAN DATA with AMR

#Now we can thus clean our data:

dbresistence$BACTERIA <- as.mo(dbresistence$BACTERIA, info = TRUE)
## ℹ Microorganism translation was uncertain for "Pseudomona aeruginosa"
##   (assumed Pseudomonas aeruginosa). Run mo_uncertainties() to review this,
##   or use add_custom_microorganisms() to add custom entries.
mo_uncertainties()
## Matching scores are based on the resemblance between the input and the full
## taxonomic name, and the pathogenicity in humans. See ?mo_matching_score.
## 
## --------------------------------------------------------------------------------
## "Pseudomona aeruginosa" -> Pseudomonas aeruginosa (B_PSDMN_AERG, 0.977)
## Also matched: Pseudomonas andropogonis (0.708), Pseudomonas
##               antimicrobica (0.680), Pseudomonas alcaligenes (0.674), Pseudomonas
##               acidovorans (0.630), Pseudomonas asiatica (0.500), Pseudomonas
##               azerbaijanoriens (0.500), Pseudomonas azerbaijanorientalis (0.490),
##               Pseudomonas azerbaijanoccidens (0.489), Pseudomonas
##               azerbaijanoccidentalis (0.480), and Pseudomonas argentinensis (0.480)
## 
## Only the first 10 other matches of each record are shown. Run
## print(mo_uncertainties(), n = ...) to view more entries, or save
## mo_uncertainties() to an object.
library(gt)
bact<-dbresistence %>%
  count(mo_name(BACTERIA), sort = TRUE)

bact %>% gt()
mo_name(BACTERIA) n
Escherichia coli 128
Staphylococcus epidermidis 28
Staphylococcus aureus 22
Klebsiella pneumoniae 6
Pseudomonas aeruginosa 6
(levadura desconocida) 5
Enterobacter 1
Proteus 1
#Antibiotic results
# method 1, be explicit about the columns:
#dbresistence <- dbresistence %>% mutate_at(vars(AK:VA), as.sir)

#method 2:
dbresistence <- dbresistence %>%
  mutate_if(is_sir_eligible, as.sir)

summary(dbresistence)
##    BACTERIA                AK                    AM               
##  Class :mo             Class:sir             Class:sir            
##  <NA>  :0              %R   :11.0% (n=13)    %R   :90.9% (n=130)  
##  Unique:8              %SI  :89.0% (n=105)   %SI  : 9.1% (n=13)   
##  #1    :B_ESCHR_COLI   - %S :89.0% (n=105)   - %S : 9.1% (n=13)   
##  #2    :B_STPHY_EPDR   - %I : 0.0% (n=0)     - %I : 0.0% (n=0)    
##  #3    :B_STPHY_AURS                                              
##      CB                   CF                  CFX               
##  Class:sir            Class:sir            Class:sir            
##  %R   :66.9% (n=79)   %R   :53.8% (n=77)   %R   :26.6% (n=38)   
##  %SI  :33.1% (n=39)   %SI  :46.2% (n=66)   %SI  :73.4% (n=105)  
##  - %S :33.1% (n=39)   - %S :46.2% (n=66)   - %S :73.4% (n=105)  
##  - %I : 0.0% (n=0)    - %I : 0.0% (n=0)    - %I : 0.0% (n=0)    
##                                                                 
##     CPF                   CL                  GENT              
##  Class:sir            Class:sir            Class:sir            
##  %R   :36.4% (n=52)   %R   :17.8% (n=21)   %R   :28.0% (n=40)   
##  %SI  :63.6% (n=91)   %SI  :82.2% (n=97)   %SI  :72.0% (n=103)  
##  - %S :63.6% (n=91)   - %S :82.2% (n=97)   - %S :72.0% (n=103)  
##  - %I : 0.0% (n=0)    - %I : 0.0% (n=0)    - %I : 0.0% (n=0)    
##                                                                 
##     NET                    NF                   NOF              
##  Class:sir             Class:sir             Class:sir           
##  %R   :14.4% (n=17)    %R   :10.2% (n=12)    %R   :40.7% (n=48)  
##  %SI  :85.6% (n=101)   %SI  :89.8% (n=106)   %SI  :59.3% (n=70)  
##  - %S :85.6% (n=101)   - %S :89.8% (n=106)   - %S :59.3% (n=70)  
##  - %I : 0.0% (n=0)     - %I : 0.0% (n=0)     - %I : 0.0% (n=0)   
##                                                                  
##     SXT                  CLM                   DC              
##  Class:sir            Class:sir            Class:sir           
##  %R   :45.5% (n=65)   %R   :20.0% (n=5)    %R   :96.0% (n=24)  
##  %SI  :54.5% (n=78)   %SI  :80.0% (n=20)   %SI  : 4.0% (n=1)   
##  - %S :54.5% (n=78)   - %S :80.0% (n=20)   - %S : 4.0% (n=1)   
##  - %I : 0.0% (n=0)    - %I : 0.0% (n=0)    - %I : 0.0% (n=0)   
##                                                                
##      E                    PE                   TE              
##  Class:sir            Class:sir            Class:sir           
##  %R   :44.0% (n=11)   %R   :96.0% (n=24)   %R   : 8.0% (n=2)   
##  %SI  :56.0% (n=14)   %SI  : 4.0% (n=1)    %SI  :92.0% (n=23)  
##  - %S :56.0% (n=14)   - %S : 4.0% (n=1)    - %S :92.0% (n=23)  
##  - %I : 0.0% (n=0)    - %I : 0.0% (n=0)    - %I : 0.0% (n=0)   
##                                                                
##      VA              
##  Class:sir           
##  %R   :12.0% (n=3)   
##  %SI  :88.0% (n=22)  
##  - %S :88.0% (n=22)  
##  - %I : 0.0% (n=0)   
## 
dbresisten<-dbresisten %>%
  mutate_if(is_sir_eligible, as.sir)

    ggplot(dbresisten) +
    geom_sir() +
    scale_y_percent() +
    scale_sir_colours() +
    labels_sir_count() +
    theme_sir()+ theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "CPF".
## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "NF".
## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "NOF".
## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "DC".
## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "E".
## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "PE".
## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "CPF".
## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "NF".
## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "NOF".
## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "DC".
## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "E".
## Warning: in as.ab(): these values could not be coerced to a valid antimicrobial
## ID: "PE".
## Warning: Removed 12 rows containing missing values (`position_stack()`).
## Warning: Removed 18 rows containing missing values (`position_stack()`).

ab<-antibiogram(dbresisten,)


??antibiogram
## starting httpd help server ... done
ab %>% gt()
Patógeno (N min-max) AK AM CB CF CFX CL CPF GENT NET NF NOF SXT
E. coli (1-107) 90 11 33 39 68 84 57 69 87 92 59 51
 dbresisten %>% group_by(mo_name(BACTERIA)) %>%  summarise(amoxicillin = resistance(AM))
## Warning: There were 7 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `amoxicillin = resistance(AM)`.
## ℹ In group 1: `mo_name(BACTERIA) = "(levadura desconocida)"`.
## Caused by warning:
## ! Introducing NA: no results available for AM in group: mo_name(BACTERIA) =
## "(levadura desconocida)" (minimum = 30).
## ℹ Run ]8;;ide:run:dplyr::last_dplyr_warnings()dplyr::last_dplyr_warnings()]8;; to see the 6 remaining warnings.
## # A tibble: 8 × 2
##   `mo_name(BACTERIA)`        amoxicillin
##   <chr>                            <dbl>
## 1 (levadura desconocida)          NA    
## 2 Enterobacter                    NA    
## 3 Escherichia coli                 0.888
## 4 Klebsiella pneumoniae           NA    
## 5 Proteus                         NA    
## 6 Pseudomonas aeruginosa          NA    
## 7 Staphylococcus aureus           NA    
## 8 Staphylococcus epidermidis      NA
# Continue from your code...
datos <- dbresisten %>% 
    gather(key= "farmaco", value= "resultado", -BACTERIA) 

# Convert 'resultado' to factor for better handling in ggplot
datos$resultado <- as.factor(datos$resultado)

# Create a table with counts of each combination of 'BACTERIA', 'farmaco', and 'resultado'
heatmap_data <- datos %>% 
    group_by(BACTERIA, farmaco, resultado) %>% 
    summarise(count=n()) %>% 
    ungroup()
## `summarise()` has grouped output by 'BACTERIA', 'farmaco'. You can override
## using the `.groups` argument.
# Generate proportions by dividing count by sum of counts for each 'BACTERIA'-'farmaco' combination
heatmap_data <- heatmap_data %>%
    group_by(BACTERIA, farmaco) %>%
    mutate(proportion = count / sum(count)) %>%
    ungroup()


# Filter out blank values
heatmap_data <- heatmap_data %>% filter(resultado != "")

# Plot the heatmap
ggplot(heatmap_data, aes(x=resultado, y=BACTERIA, fill=proportion)) +
    geom_tile() +
    scale_fill_gradient(low = "white", high = "red") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    labs(x="", y="", fill="Proportion") +
    facet_wrap(~farmaco)+theme_test()+theme(legend.position='right',text = element_text(size = 12, face="bold"), axis.text.x = element_text(size=10, colour = "black"),axis.text.y = element_text(size=10, colour = "black"),
        legend.text = element_text(size = 12, color = "black"),strip.text.x = element_text(size = 12,face="bold"))

ggplot(heatmap_data, aes(x=resultado, y=farmaco, fill=proportion)) +
    geom_tile() +
    scale_fill_gradient(low = "#C0A635", high = "#033195") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    labs(x="", y="", fill="Proportion") +
    facet_wrap(~BACTERIA)+theme_test()+
  theme(legend.position='right',text = element_text(size = 12, face="bold"), axis.text.x = element_text(size=16, colour = "black"),axis.text.y = element_text(size=10, colour = "black"),
        legend.text = element_text(size = 12, color = "black"),strip.text.x = element_text(size = 12,face="bold"))

library(ggpubr)
datosplot <- datos %>% drop_na() 

ggplot(datosplot, aes(x = BACTERIA, fill = resultado)) + 
  stat_count(geom = "bar",
  position = "fill",stat="identity",color="black")+
  facet_wrap(. ~ farmaco)+theme_classic2()+
  coord_flip() + theme(legend.text = element_text(size = 8, color = "black"))+ scale_fill_manual(values = alpha(c( "#C0A635" ,"#033195"), .75))+
  theme(legend.position='right',text = element_text(size = 12, face="bold"), axis.text.x = element_text(size=10, colour = "black"),axis.text.y = element_text(size=10, colour = "black"),
        legend.text = element_text(size = 12, color = "black"),strip.text.x = element_text(size = 12,face="bold"))
## Warning in stat_count(geom = "bar", position = "fill", stat = "identity", :
## Ignoring unknown parameters: `stat`

 #+ theme(text = element_text(size = 5))+theme(axis.text.x = element_text(angle = 90))+theme(axis.text.x = element_text(hjust = 1, vjust = .5))+ scale_fill_brewer(palette="Pastel1")+ylab("")+xlab("")+theme(panel.spacing.x = unit(1, "lines"))+scale_x_discrete(limits = rev)+ scale_y_continuous(expand=expansion(mult=c(0,0.01)))+
dbresisclin <- db %>% select(EDAD, BACTERIA, AK:VA,)  

dbresisclin<-dbresisclin %>% mutate_at(vars(AK:VA), as.character) 

dbresistencin<- dbresisclin %>%
  mutate_at(vars(AK:VA), ~recode(.,  `1` = "S", `3` = "R", `NA`= ""))


# Gather data to long format
long_df <- dbresistencin %>% 
  gather(key = "farmaco", value = "resultado", -EDAD, -BACTERIA)

# View the resulting long dataframe
head(long_df)
## # A tibble: 6 × 4
##    EDAD BACTERIA              farmaco resultado
##   <dbl> <chr>                 <chr>   <chr>    
## 1    49 Escherichia coli      AK      S        
## 2    34 klebsiella pneumoniae AK      <NA>     
## 3    33 Yeast                 AK      <NA>     
## 4    53 Escherichia coli      AK      S        
## 5    45 Escherichia coli      AK      S        
## 6    33 Staphylococcus aureus AK      <NA>
long_df<-long_df %>% drop_na()
long_df %>% tbl_summary(by=resultado) %>% add_p()
## There was an error in 'add_p()/add_difference()' for variable 'BACTERIA', p-value omitted:
## Error in stats::fisher.test(c("Escherichia coli", "Escherichia coli", : FEXACT error 7(location). LDSTP=18360 is too small for this problem,
##   (pastp=69.7532, ipn_0:=ipoin[itp=564]=8010, stp[ipn_0]=69.4681).
## Increase workspace or consider using 'simulate.p.value=TRUE'
Characteristic R, N = 6341 S, N = 1,0341 p-value2
EDAD DEL PACIENTE 38 (26, 53) 37 (23, 52) 0.029
BACTERIA
    Enterobacter 7 (1.1%) 5 (0.5%)
    Escherichia coli 477 (75%) 783 (76%)
    klebsiella pneumoniae 15 (2.4%) 45 (4.4%)
    Proteus 7 (1.1%) 5 (0.5%)
    Pseudomona aeruginosa 37 (5.8%) 23 (2.2%)
    Staphylococcus aureus 31 (4.9%) 53 (5.1%)
    Staphylococcus epidermidis 60 (9.5%) 120 (12%)
farmaco <0.001
    AK 13 (2.1%) 103 (10.0%)
    AM 126 (20%) 13 (1.3%)
    CB 77 (12%) 39 (3.8%)
    CF 75 (12%) 64 (6.2%)
    CFX 36 (5.7%) 103 (10.0%)
    CL 20 (3.2%) 96 (9.3%)
    CLM 4 (0.6%) 19 (1.8%)
    CPF 50 (7.9%) 89 (8.6%)
    DC 22 (3.5%) 1 (<0.1%)
    E 10 (1.6%) 13 (1.3%)
    GENT 39 (6.2%) 100 (9.7%)
    NET 15 (2.4%) 101 (9.8%)
    NF 12 (1.9%) 104 (10%)
    NOF 46 (7.3%) 70 (6.8%)
    PE 22 (3.5%) 1 (<0.1%)
    SXT 62 (9.8%) 77 (7.4%)
    TE 2 (0.3%) 21 (2.0%)
    VA 3 (0.5%) 20 (1.9%)
1 Median (IQR); n (%)
2 Wilcoxon rank sum test; Pearson's Chi-squared test
# Load necessary libraries
library(dplyr)
library(ggplot2)
library(tidyr)
library(purrr)
library(broom)
## Warning: package 'broom' was built under R version 4.2.3
# Your data preparation process here
dbresisclin <- db %>% select(EDAD, BACTERIA, AK:VA)
dbresisclin <- dbresisclin %>% mutate_at(vars(AK:VA), as.character) 

dbresistencin <- dbresisclin %>%
  mutate_at(vars(AK:VA), ~recode(.,  `1` = "S", `3` = "R", `NA`= ""))

# Gather data to long format
long_df <- dbresistencin %>% 
  gather(key = "farmaco", value = "resultado", -EDAD, -BACTERIA)

# Filter out blank values
long_df <- long_df %>% filter(resultado != "")

# Calculate the count for each bacteria and result
df_counts <- long_df %>%
  group_by(BACTERIA, resultado) %>%
  summarise(n = n(), .groups = "drop")

# Calculate the percentage for each bacteria and result
df_counts <- df_counts %>%
  group_by(BACTERIA) %>%
  mutate(percent = n / sum(n) * 100)

# Run chi square test for each bacteria and add p-value to data frame
df_counts <- df_counts %>%
  nest(data = -BACTERIA) %>%
  mutate(chi_test = map(data, ~ chisq.test(.x$n)$p.value)) %>%
  unnest(data, chi_test)
## Warning: `unnest()` has a new interface. See `?unnest` for details.
## ℹ Try `df %>% unnest(c(data, chi_test))`, with `mutate()` if needed.
# Add significance labels
df_counts <- df_counts %>%
  mutate(signif_label = case_when(
    chi_test > 0.05 ~ "NS",
    chi_test <= 0.001 ~ "<0.001",
    chi_test >= 0.004 ~ "0.004",
    TRUE ~ as.character(chi_test)
  ))

# Create the bar plot
ggplot(df_counts, aes(x = resultado, y = percent, fill = resultado)) +
  geom_bar(stat = "identity") +
  facet_wrap(~BACTERIA, scales = "free_y") +
  geom_text(aes(label = paste0("p = ", signif_label)), vjust = -1) +
  labs(title = "Bacteria Resistance and Sensitivity", 
       x = "Result", 
       y = "Percentage", 
       fill = "")+theme_sir()

# Load necessary libraries
library(dplyr)
library(ggplot2)
library(tidyr)

# Your data preparation process here
dbresisclin <- db %>% select(EDAD, BACTERIA, AK:VA)
dbresisclin <- dbresisclin %>% mutate_at(vars(AK:VA), as.character) 

dbresistencin <- dbresisclin %>%
  mutate_at(vars(AK:VA), ~recode(.,  `1` = "S", `3` = "R", `NA`= ""))

# Gather data to long format
long_df <- dbresistencin %>% 
  gather(key = "farmaco", value = "resultado", -EDAD, -BACTERIA)

# Filter out blank values and select only 'R' results
long_df <- long_df %>% filter(resultado == "R")

# Calculate the count for each bacteria and result
df_counts <- long_df %>%
  group_by(BACTERIA, farmaco) %>%
  summarise(n = n(), .groups = "drop") 

# Calculate the total count for each bacteria
df_totals <- long_df %>%
  group_by(BACTERIA) %>%
  summarise(total = n(), .groups = "drop")

# Join the data frames and calculate proportions and confidence intervals
df_final <- inner_join(df_counts, df_totals, by = "BACTERIA") %>%
  mutate(proportion = n / total,
         conf_low = proportion - 1.96 * sqrt((proportion * (1 - proportion)) / total),
         conf_high = proportion + 1.96 * sqrt((proportion * (1 - proportion)) / total))


# Create the bar plot
ggplot(df_final, aes(x = farmaco, y = proportion, fill = BACTERIA)) +
  geom_bar(stat = "identity", position = position_dodge())  +
  labs(title = "Bacteria Resistance Frequency", 
       x = "Drug", 
       y = "Resistance Frequency", 
       fill = "Bacteria")

#filter enterobacter
filtentero <- df_final %>% filter(BACTERIA == "Enterobacter")
enterores<-ggplot(filtentero, aes(x = farmaco, y = proportion, fill = BACTERIA))+
  geom_errorbar(aes(ymin = conf_low, ymax = conf_high), width = 0.2, position = position_dodge(0.9))+
  geom_bar(stat = "identity", position = position_dodge())  +
  labs(title = "", 
       x = "Drug", 
       y = "Resistance Frequency", 
       fill = "")
enterores

#filter e colI
filtecoli<-df_final %>% filter(BACTERIA == "Escherichia coli")
ecolires<-ggplot(filtecoli, aes(x = farmaco, y = proportion, fill = BACTERIA))+
  geom_errorbar(aes(ymin = conf_low, ymax = conf_high), width = 0.2, position = position_dodge(0.9))+
  geom_bar(stat = "identity", position = position_dodge())  +
  labs(title = "", 
       x = "Drug", 
       y = "Resistance Frequency", 
       fill = "")
ecolires

#Pseudomona aeruginosa
filtepseudo<-df_final %>% filter(BACTERIA == "Pseudomona aeruginosa")
pseudores<-ggplot(filtepseudo, aes(x = farmaco, y = proportion, fill = BACTERIA))+
  geom_errorbar(aes(ymin = conf_low, ymax = conf_high), width = 0.2, position = position_dodge(0.9))+
  geom_bar(stat = "identity", position = position_dodge())  +
  labs(title = "", 
       x = "Drug", 
       y = "Resistance Frequency", 
       fill = "")
pseudores

# Load AMR package
library(AMR)
# Calculate multidrug resistance
mdr_results <- dbresistencin %>%
  select(BACTERIA, AK:VA) %>%
  mutate(across(AK:VA, as.sir)) %>%
  mutate(mdr = mdro(.))
##  (54 isolates had no test results)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `mdr = mdro(.)`.
## Caused by warning:
## ! in mdro(): NA introduced for isolates where the available percentage of
## antimicrobial classes was below 50% (set with pct_required_classes)
# View the results
head(mdr_results)
## # A tibble: 6 × 20
##   BACTERIA     AK    AM    CB    CF    CFX   CPF   CL    GENT  NET   NF    NOF  
##   <chr>        <sir> <sir> <sir> <sir> <sir> <sir> <sir> <sir> <sir> <sir> <sir>
## 1 Escherichia… S     R     S     R     S     R     S     R     S     R     S    
## 2 klebsiella … NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## 3 Yeast        NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## 4 Escherichia… S     R     R     R     S     R     R     S     S     S     S    
## 5 Escherichia… S     R     R     R     S     S     S     S     R     S     R    
## 6 Staphylococ… NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## # ℹ 8 more variables: SXT <sir>, CLM <sir>, DC <sir>, E <sir>, PE <sir>,
## #   TE <sir>, VA <sir>, mdr <ord>
# Chi-squared test
# Fisher's Exact Test
fisher_test <- fisher.test(table(mdr_results$BACTERIA, mdr_results$mdr))
print(fisher_test)
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mdr_results$BACTERIA, mdr_results$mdr)
## p-value = 9.115e-15
## alternative hypothesis: two.sided
fisher_test$p.value
## [1] 9.114706e-15
# Bar plot of multidrug resistance
ggplot(mdr_results, aes(x = BACTERIA, fill = mdr, na.rm = TRUE)) +
  geom_bar(position = "fill") +
  labs(x = "Bacteria", y = "Proportion", fill = "MDR Status") +
  scale_fill_discrete(labels = c("Non-MDR", "MDR")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Load necessary libraries
library(dplyr)

# Define start and end dates
start_date <- as.Date("2020-01-01")
end_date <- as.Date("2023-12-31")

# Add date column
dbresistencin <- dbresistencin %>%
  mutate(FECHA = sample(seq(start_date, end_date, by = "day"), size = nrow(dbresistencin), replace = TRUE))

# Load necessary libraries
library(ggplot2)
library(lubridate)

# Convert date to numeric
dbresistencin <- dbresistencin %>%
  mutate(FECHA = as.numeric(FECHA))

# Remove rows with NA in columns AK to VA
dbresistencin <- dbresistencin %>% drop_na(AK:VA)


# Fit linear regression model
#fit <- lm(AK ~ FECHA, data = dbresistencin, na.rm = TRUE)

# Make predictions
#dbresistencin$pred <- predict(fit, newdata = dbresistencin)

# Plot actual vs predicted
#ggplot(dbresistencin, aes(x = as.Date(FECHA), y = AK)) +
 # geom_point() +
  #geom_line(aes(y = pred), color = "red")