R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

#PULL IN DATA
# Load required libraries
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(purrr)
## Warning: package 'purrr' was built under R version 4.4.3
# List of Excel files
files <- c("C:/Users/xdf2/Desktop/2025Q1Dorgsheets.xlsx", "C:/Users/xdf2/Desktop/2025Q1Vorgsheets.xlsx", "C:/Users/xdf2/Desktop/2025Q1Torgsheets.xlsx", "C:/Users/xdf2/Desktop/2025Q1Korgsheets.xlsx", "C:/Users/xdf2/Desktop/2024Q4Dorgsheets.xlsx", "C:/Users/xdf2/Desktop/2024Q4Vorgsheets.xlsx", "C:/Users/xdf2/Desktop/2024Q4Torgsheets.xlsx", "C:/Users/xdf2/Desktop/2024Q4Korgsheets.xlsx", "C:/Users/xdf2/Desktop/2024Q3Dorgsheets.xlsx", "C:/Users/xdf2/Desktop/2024Q3Vorgsheets.xlsx", "C:/Users/xdf2/Desktop/2024Q3Torgsheets.xlsx", "C:/Users/xdf2/Desktop/2024Q3Korgsheets.xlsx")

# Function to read all sheets from a file as character columns
read_all_sheets <- function(file) {
  sheet_names <- excel_sheets(file)
  map_df(sheet_names, ~ read_excel(file, sheet = .x, col_types = "text") %>%
           mutate(SourceFile = basename(file), Sheet = .x))
}

# Read and combine all sheets from all files
combined_data <- map_df(files, read_all_sheets)
## New names:
## • `%` -> `%...6`
## • `%` -> `%...8`
## • `%` -> `%...10`
## • `%` -> `%...12`
## • `%` -> `%...14`
## • `%` -> `%...16`
## • `%` -> `%...18`
## • `%` -> `%...20`
## • `%` -> `%...22`
## • `%` -> `%...24`
## • `%` -> `%...26`
## • `%` -> `%...28`
## • `%` -> `%...30`
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## • `%` -> `%...6`
## • `%` -> `%...8`
## • `%` -> `%...10`
## • `%` -> `%...12`
## • `%` -> `%...14`
## • `%` -> `%...16`
## • `%` -> `%...18`
## • `%` -> `%...20`
## • `%` -> `%...22`
## • `%` -> `%...24`
## • `%` -> `%...26`
## • `%` -> `%...28`
## • `%` -> `%...30`
# View structure and preview
str(combined_data)
## tibble [816 × 32] (S3: tbl_df/tbl/data.frame)
##  $ Date            : chr [1:816] "2025Q1" "2025Q1" "2025Q1" "2025Q1" ...
##  $ Hospital        : chr [1:816] "D" "D" "D" "D" ...
##  $ Unit            : chr [1:816] "Surgery" "Surgery" "Surgery" "Surgery" ...
##  $ Organisms       : chr [1:816] "GRAM NEGATIVE" "Klebsiella pneumoniae" "Other Klebsiella" "E. coli" ...
##  $ All Wounds Total: chr [1:816] "37" "5" "0" "2" ...
##  $ %...6           : chr [1:816] "14.015151515151516" "1.893939393939394" "0" "0.75757575757575757" ...
##  $ WWTotal         : chr [1:816] "31" "2" "0" "1" ...
##  $ %...8           : chr [1:816] "12.449799196787151" "0.80321285140562237" "0" "0.40160642570281119" ...
##  $ WWlt48          : chr [1:816] "31" "2" NA "1" ...
##  $ %...10          : chr [1:816] "12.449799196787151" "0.80321285140562237" "0" "0.40160642570281119" ...
##  $ WWgte48         : chr [1:816] "0" NA NA NA ...
##  $ %...12          : chr [1:816] "0" NA NA NA ...
##  $ NWTotal         : chr [1:816] "6" "3" "0" "1" ...
##  $ %...14          : chr [1:816] "40" "20" "0" "6.666666666666667" ...
##  $ NWlt48          : chr [1:816] "3" "1" NA NA ...
##  $ %...16          : chr [1:816] "33.333333333333329" "11.111111111111111" "0" "0" ...
##  $ NWgte48         : chr [1:816] "3" "2" NA "1" ...
##  $ %...18          : chr [1:816] "49.999999999999993" "33.333333333333329" "0" "16.666666666666664" ...
##  $ BloodTotal      : chr [1:816] "0" "0" "0" "0" ...
##  $ %...20          : chr [1:816] "0" "0" "0" "0" ...
##  $ Bloodlt48       : chr [1:816] "0" NA NA NA ...
##  $ %...22          : chr [1:816] "0" "0" "0" "0" ...
##  $ Bloodgte48      : chr [1:816] "0" NA NA NA ...
##  $ %...24          : chr [1:816] "0" NA NA NA ...
##  $ CSFTotal        : chr [1:816] "0" "0" "0" "0" ...
##  $ %...26          : chr [1:816] "0" NA NA NA ...
##  $ CSFlt48         : chr [1:816] "0" NA NA NA ...
##  $ %...28          : chr [1:816] "0" NA NA NA ...
##  $ CSFgte48        : chr [1:816] "0" NA NA NA ...
##  $ %...30          : chr [1:816] "0" NA NA NA ...
##  $ SourceFile      : chr [1:816] "2025Q1Dorgsheets.xlsx" "2025Q1Dorgsheets.xlsx" "2025Q1Dorgsheets.xlsx" "2025Q1Dorgsheets.xlsx" ...
##  $ Sheet           : chr [1:816] "Surgery" "Surgery" "Surgery" "Surgery" ...
head(combined_data)
## # A tibble: 6 × 32
##   Date   Hospital Unit    Organisms   `All Wounds Total` `%...6` WWTotal `%...8`
##   <chr>  <chr>    <chr>   <chr>       <chr>              <chr>   <chr>   <chr>  
## 1 2025Q1 D        Surgery GRAM NEGAT… 37                 14.015… 31      12.449…
## 2 2025Q1 D        Surgery Klebsiella… 5                  1.8939… 2       0.8032…
## 3 2025Q1 D        Surgery Other Kleb… 0                  0       0       0      
## 4 2025Q1 D        Surgery E. coli     2                  0.7575… 1       0.4016…
## 5 2025Q1 D        Surgery Other Ente… 12                 4.5454… 10      4.0160…
## 6 2025Q1 D        Surgery Acinetobac… 10                 3.7878… 10      4.0160…
## # ℹ 24 more variables: WWlt48 <chr>, `%...10` <chr>, WWgte48 <chr>,
## #   `%...12` <chr>, NWTotal <chr>, `%...14` <chr>, NWlt48 <chr>,
## #   `%...16` <chr>, NWgte48 <chr>, `%...18` <chr>, BloodTotal <chr>,
## #   `%...20` <chr>, Bloodlt48 <chr>, `%...22` <chr>, Bloodgte48 <chr>,
## #   `%...24` <chr>, CSFTotal <chr>, `%...26` <chr>, CSFlt48 <chr>,
## #   `%...28` <chr>, CSFgte48 <chr>, `%...30` <chr>, SourceFile <chr>,
## #   Sheet <chr>
#SUMMARIZE BY PATHOGEN WITHOUT TOTALS
# Organisms of interest
organisms_of_interest <- c("Klebsiella pneumoniae", "Other Klebsiella", "E. coli",
  "Other Enterobacterales", "Acinetobacter spp",
  "Pseudomonas aeruginosa", "Other gram negative",
  "Staphylococcus aureus", "Enterococcus faecalis", "Enterococcus faecium",
  "Other gram positive")

# Specimen columns to sum
columns_to_sum <- c(
  "All Wounds Total",
  "BloodTotal", "CSFTotal")

summary_by_org <- combined_data %>%
  filter(Organisms %in% organisms_of_interest) %>%
  group_by(Organism = Organisms, Date, Hospital, Unit) %>%
  summarise(
    total_count = rowSums(across(all_of(columns_to_sum), ~ as.numeric(.)), na.rm = TRUE) %>% sum(),
    .groups = "drop"
  )


library(scales)
## Warning: package 'scales' was built under R version 4.4.3
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
cb_palette_extended <- hue_pal()(length(unique(summary_by_org$Unit)))

# or use `viridis` for perceptual uniformity
library(viridis)
## Warning: package 'viridis' was built under R version 4.4.3
## Loading required package: viridisLite
## Warning: package 'viridisLite' was built under R version 4.4.3
## 
## Attaching package: 'viridis'
## 
## The following object is masked from 'package:scales':
## 
##     viridis_pal
cb_palette_extended <- viridis(length(unique(summary_by_org$Unit)), option = "C") # Originally had “D”, but try "C" or "E" for better contrast

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
hospitals <- c("D", "V", "T", "K")

for (h in hospitals) {
  plot <- summary_by_org %>%
    filter(Hospital == h) %>%
    ggplot(aes(x = Date, y = total_count, fill = Unit)) +
    geom_col() +
    facet_wrap(~ Organism, scales = "free_y") +
    scale_fill_grey(start = 0.3, end = 0.8) +
theme_bw() +
    labs(
      title = paste("Organism Counts at Hospital", h),
      x = "Date", y = "Total Count", fill = "Unit"
    ) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

  print(plot)  # shows plot in RStudio

}

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'stringr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ readr     2.1.5     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ scales::discard()   masks purrr::discard()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
gram_negative <- c(
  "Klebsiella pneumoniae", "Klebsiella", "E. coli",
  "Other Enterobacterales", "Acinetobacter spp",
  "Pseudomonas aeruginosa", "Other gram negative"
)

gram_positive <- c(
  "Staphylococcus aureus", "Enterococcus faecalis", "Enterococcus faecium",
  "Other gram positive"
)

combined_data <- combined_data %>%
  mutate(
    BacteriaType = case_when(
      Organisms %in% gram_negative ~ "Gram Negative",
      Organisms %in% gram_positive ~ "Gram Positive",
      Organisms == "TotalCultured" ~ "TotalCultured",
      TRUE ~ "Other"
    )
  )

combined_data <- combined_data %>%
  mutate(
    BloodTotal = as.numeric(BloodTotal),
    CSFTotal = as.numeric(CSFTotal),
WWTotal = as.numeric(WWTotal),
NWTotal = as.numeric(NWTotal)
  )

view(combined_data)

df_summary <- combined_data %>%
  filter(BacteriaType %in% c("Gram Negative", "Gram Positive", "TotalCultured")) %>%
  group_by(Date, Hospital, Unit, BacteriaType) %>%
  summarise(
    Total = sum(BloodTotal, na.rm = TRUE) + sum(CSFTotal, na.rm = TRUE) + sum(WWTotal, na.rm = TRUE) + sum(NWTotal, na.rm = TRUE),
    .groups = "drop"
  )

# 1. Pivot wider so each BacteriaType becomes a column
df_wide <- df_summary %>%
  tidyr::pivot_wider(
    names_from = BacteriaType,
    values_from = Total,
    values_fill = 0
  )

# 2. Compute No Result
no_result <- df_wide %>%
  mutate(
    `No Result` = TotalCultured - `Gram Negative` - `Gram Positive`
  ) %>%
  select(Date, Hospital, Unit, `No Result`)

# 3. Pivot longer to match df_summary format
no_result_long <- no_result %>%
  tidyr::pivot_longer(
    cols = `No Result`,
    names_to = "BacteriaType",
    values_to = "Total"
  )

df_plot <- df_summary %>%
  filter(BacteriaType != "TotalCultured") %>%
  bind_rows(no_result_long)

df_plot$BacteriaType <- factor(
  df_plot$BacteriaType,
  levels = c("Gram Negative", "Gram Positive", "No Result")
)

# Extract TotalCultured for each group
cultured_totals <- df_summary %>%
  filter(BacteriaType == "TotalCultured") %>%
  select(Date, Hospital, Unit, TotalCultured = Total)

# Join to df_plot
df_plot_pct <- df_plot %>%
  left_join(cultured_totals, by = c("Date", "Hospital", "Unit")) %>%
  mutate(Percent = round((Total / TotalCultured) * 100, 1))

#plot by hospital GN, GP, and no result as a % of total
for (h in hospitals) {
plot <- df_plot_pct %>% 
filter(Hospital == h) %>% 
ggplot(aes(x = Date, y = Total, fill = BacteriaType)) +
geom_col(position = "stack") +
  geom_text(aes(label = paste0(Percent, "%")),
            position = position_stack(vjust = 0.5),
            size = 3, color = "black") +
facet_wrap(Hospital ~ Unit, scales = "free") +
scale_fill_grey(start = 0.3, end = 0.8) +
theme_bw() +
labs(title = "Bacteria Type Distribution (as % of Total Cultured)", x = "Date", y = "Total") 

print(plot)  # shows plot in RStudio

}