This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
#PULL IN DATA
# Load required libraries
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(purrr)
## Warning: package 'purrr' was built under R version 4.4.3
# List of Excel files
files <- c("C:/Users/xdf2/Desktop/2025Q1Dorgsheets.xlsx", "C:/Users/xdf2/Desktop/2025Q1Vorgsheets.xlsx", "C:/Users/xdf2/Desktop/2025Q1Torgsheets.xlsx", "C:/Users/xdf2/Desktop/2025Q1Korgsheets.xlsx", "C:/Users/xdf2/Desktop/2024Q4Dorgsheets.xlsx", "C:/Users/xdf2/Desktop/2024Q4Vorgsheets.xlsx", "C:/Users/xdf2/Desktop/2024Q4Torgsheets.xlsx", "C:/Users/xdf2/Desktop/2024Q4Korgsheets.xlsx", "C:/Users/xdf2/Desktop/2024Q3Dorgsheets.xlsx", "C:/Users/xdf2/Desktop/2024Q3Vorgsheets.xlsx", "C:/Users/xdf2/Desktop/2024Q3Torgsheets.xlsx", "C:/Users/xdf2/Desktop/2024Q3Korgsheets.xlsx")
# Function to read all sheets from a file as character columns
read_all_sheets <- function(file) {
sheet_names <- excel_sheets(file)
map_df(sheet_names, ~ read_excel(file, sheet = .x, col_types = "text") %>%
mutate(SourceFile = basename(file), Sheet = .x))
}
# Read and combine all sheets from all files
combined_data <- map_df(files, read_all_sheets)
## New names:
## • `%` -> `%...6`
## • `%` -> `%...8`
## • `%` -> `%...10`
## • `%` -> `%...12`
## • `%` -> `%...14`
## • `%` -> `%...16`
## • `%` -> `%...18`
## • `%` -> `%...20`
## • `%` -> `%...22`
## • `%` -> `%...24`
## • `%` -> `%...26`
## • `%` -> `%...28`
## • `%` -> `%...30`
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## • `%` -> `%...6`
## • `%` -> `%...8`
## • `%` -> `%...10`
## • `%` -> `%...12`
## • `%` -> `%...14`
## • `%` -> `%...16`
## • `%` -> `%...18`
## • `%` -> `%...20`
## • `%` -> `%...22`
## • `%` -> `%...24`
## • `%` -> `%...26`
## • `%` -> `%...28`
## • `%` -> `%...30`
# View structure and preview
str(combined_data)
## tibble [816 × 32] (S3: tbl_df/tbl/data.frame)
## $ Date : chr [1:816] "2025Q1" "2025Q1" "2025Q1" "2025Q1" ...
## $ Hospital : chr [1:816] "D" "D" "D" "D" ...
## $ Unit : chr [1:816] "Surgery" "Surgery" "Surgery" "Surgery" ...
## $ Organisms : chr [1:816] "GRAM NEGATIVE" "Klebsiella pneumoniae" "Other Klebsiella" "E. coli" ...
## $ All Wounds Total: chr [1:816] "37" "5" "0" "2" ...
## $ %...6 : chr [1:816] "14.015151515151516" "1.893939393939394" "0" "0.75757575757575757" ...
## $ WWTotal : chr [1:816] "31" "2" "0" "1" ...
## $ %...8 : chr [1:816] "12.449799196787151" "0.80321285140562237" "0" "0.40160642570281119" ...
## $ WWlt48 : chr [1:816] "31" "2" NA "1" ...
## $ %...10 : chr [1:816] "12.449799196787151" "0.80321285140562237" "0" "0.40160642570281119" ...
## $ WWgte48 : chr [1:816] "0" NA NA NA ...
## $ %...12 : chr [1:816] "0" NA NA NA ...
## $ NWTotal : chr [1:816] "6" "3" "0" "1" ...
## $ %...14 : chr [1:816] "40" "20" "0" "6.666666666666667" ...
## $ NWlt48 : chr [1:816] "3" "1" NA NA ...
## $ %...16 : chr [1:816] "33.333333333333329" "11.111111111111111" "0" "0" ...
## $ NWgte48 : chr [1:816] "3" "2" NA "1" ...
## $ %...18 : chr [1:816] "49.999999999999993" "33.333333333333329" "0" "16.666666666666664" ...
## $ BloodTotal : chr [1:816] "0" "0" "0" "0" ...
## $ %...20 : chr [1:816] "0" "0" "0" "0" ...
## $ Bloodlt48 : chr [1:816] "0" NA NA NA ...
## $ %...22 : chr [1:816] "0" "0" "0" "0" ...
## $ Bloodgte48 : chr [1:816] "0" NA NA NA ...
## $ %...24 : chr [1:816] "0" NA NA NA ...
## $ CSFTotal : chr [1:816] "0" "0" "0" "0" ...
## $ %...26 : chr [1:816] "0" NA NA NA ...
## $ CSFlt48 : chr [1:816] "0" NA NA NA ...
## $ %...28 : chr [1:816] "0" NA NA NA ...
## $ CSFgte48 : chr [1:816] "0" NA NA NA ...
## $ %...30 : chr [1:816] "0" NA NA NA ...
## $ SourceFile : chr [1:816] "2025Q1Dorgsheets.xlsx" "2025Q1Dorgsheets.xlsx" "2025Q1Dorgsheets.xlsx" "2025Q1Dorgsheets.xlsx" ...
## $ Sheet : chr [1:816] "Surgery" "Surgery" "Surgery" "Surgery" ...
head(combined_data)
## # A tibble: 6 × 32
## Date Hospital Unit Organisms `All Wounds Total` `%...6` WWTotal `%...8`
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2025Q1 D Surgery GRAM NEGAT… 37 14.015… 31 12.449…
## 2 2025Q1 D Surgery Klebsiella… 5 1.8939… 2 0.8032…
## 3 2025Q1 D Surgery Other Kleb… 0 0 0 0
## 4 2025Q1 D Surgery E. coli 2 0.7575… 1 0.4016…
## 5 2025Q1 D Surgery Other Ente… 12 4.5454… 10 4.0160…
## 6 2025Q1 D Surgery Acinetobac… 10 3.7878… 10 4.0160…
## # ℹ 24 more variables: WWlt48 <chr>, `%...10` <chr>, WWgte48 <chr>,
## # `%...12` <chr>, NWTotal <chr>, `%...14` <chr>, NWlt48 <chr>,
## # `%...16` <chr>, NWgte48 <chr>, `%...18` <chr>, BloodTotal <chr>,
## # `%...20` <chr>, Bloodlt48 <chr>, `%...22` <chr>, Bloodgte48 <chr>,
## # `%...24` <chr>, CSFTotal <chr>, `%...26` <chr>, CSFlt48 <chr>,
## # `%...28` <chr>, CSFgte48 <chr>, `%...30` <chr>, SourceFile <chr>,
## # Sheet <chr>
#SUMMARIZE BY PATHOGEN WITHOUT TOTALS
# Organisms of interest
organisms_of_interest <- c("Klebsiella pneumoniae", "Other Klebsiella", "E. coli",
"Other Enterobacterales", "Acinetobacter spp",
"Pseudomonas aeruginosa", "Other gram negative",
"Staphylococcus aureus", "Enterococcus faecalis", "Enterococcus faecium",
"Other gram positive")
# Specimen columns to sum
columns_to_sum <- c(
"All Wounds Total",
"BloodTotal", "CSFTotal")
summary_by_org <- combined_data %>%
filter(Organisms %in% organisms_of_interest) %>%
group_by(Organism = Organisms, Date, Hospital, Unit) %>%
summarise(
total_count = rowSums(across(all_of(columns_to_sum), ~ as.numeric(.)), na.rm = TRUE) %>% sum(),
.groups = "drop"
)
library(scales)
## Warning: package 'scales' was built under R version 4.4.3
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
cb_palette_extended <- hue_pal()(length(unique(summary_by_org$Unit)))
# or use `viridis` for perceptual uniformity
library(viridis)
## Warning: package 'viridis' was built under R version 4.4.3
## Loading required package: viridisLite
## Warning: package 'viridisLite' was built under R version 4.4.3
##
## Attaching package: 'viridis'
##
## The following object is masked from 'package:scales':
##
## viridis_pal
cb_palette_extended <- viridis(length(unique(summary_by_org$Unit)), option = "C") # Originally had “D”, but try "C" or "E" for better contrast
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
hospitals <- c("D", "V", "T", "K")
for (h in hospitals) {
plot <- summary_by_org %>%
filter(Hospital == h) %>%
ggplot(aes(x = Date, y = total_count, fill = Unit)) +
geom_col() +
facet_wrap(~ Organism, scales = "free_y") +
scale_fill_grey(start = 0.3, end = 0.8) +
theme_bw() +
labs(
title = paste("Organism Counts at Hospital", h),
x = "Date", y = "Total Count", fill = "Unit"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(plot) # shows plot in RStudio
}
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'stringr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ readr 2.1.5 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
gram_negative <- c(
"Klebsiella pneumoniae", "Klebsiella", "E. coli",
"Other Enterobacterales", "Acinetobacter spp",
"Pseudomonas aeruginosa", "Other gram negative"
)
gram_positive <- c(
"Staphylococcus aureus", "Enterococcus faecalis", "Enterococcus faecium",
"Other gram positive"
)
combined_data <- combined_data %>%
mutate(
BacteriaType = case_when(
Organisms %in% gram_negative ~ "Gram Negative",
Organisms %in% gram_positive ~ "Gram Positive",
Organisms == "TotalCultured" ~ "TotalCultured",
TRUE ~ "Other"
)
)
combined_data <- combined_data %>%
mutate(
BloodTotal = as.numeric(BloodTotal),
CSFTotal = as.numeric(CSFTotal),
WWTotal = as.numeric(WWTotal),
NWTotal = as.numeric(NWTotal)
)
view(combined_data)
df_summary <- combined_data %>%
filter(BacteriaType %in% c("Gram Negative", "Gram Positive", "TotalCultured")) %>%
group_by(Date, Hospital, Unit, BacteriaType) %>%
summarise(
Total = sum(BloodTotal, na.rm = TRUE) + sum(CSFTotal, na.rm = TRUE) + sum(WWTotal, na.rm = TRUE) + sum(NWTotal, na.rm = TRUE),
.groups = "drop"
)
# 1. Pivot wider so each BacteriaType becomes a column
df_wide <- df_summary %>%
tidyr::pivot_wider(
names_from = BacteriaType,
values_from = Total,
values_fill = 0
)
# 2. Compute No Result
no_result <- df_wide %>%
mutate(
`No Result` = TotalCultured - `Gram Negative` - `Gram Positive`
) %>%
select(Date, Hospital, Unit, `No Result`)
# 3. Pivot longer to match df_summary format
no_result_long <- no_result %>%
tidyr::pivot_longer(
cols = `No Result`,
names_to = "BacteriaType",
values_to = "Total"
)
df_plot <- df_summary %>%
filter(BacteriaType != "TotalCultured") %>%
bind_rows(no_result_long)
df_plot$BacteriaType <- factor(
df_plot$BacteriaType,
levels = c("Gram Negative", "Gram Positive", "No Result")
)
# Extract TotalCultured for each group
cultured_totals <- df_summary %>%
filter(BacteriaType == "TotalCultured") %>%
select(Date, Hospital, Unit, TotalCultured = Total)
# Join to df_plot
df_plot_pct <- df_plot %>%
left_join(cultured_totals, by = c("Date", "Hospital", "Unit")) %>%
mutate(Percent = round((Total / TotalCultured) * 100, 1))
#plot by hospital GN, GP, and no result as a % of total
for (h in hospitals) {
plot <- df_plot_pct %>%
filter(Hospital == h) %>%
ggplot(aes(x = Date, y = Total, fill = BacteriaType)) +
geom_col(position = "stack") +
geom_text(aes(label = paste0(Percent, "%")),
position = position_stack(vjust = 0.5),
size = 3, color = "black") +
facet_wrap(Hospital ~ Unit, scales = "free") +
scale_fill_grey(start = 0.3, end = 0.8) +
theme_bw() +
labs(title = "Bacteria Type Distribution (as % of Total Cultured)", x = "Date", y = "Total")
print(plot) # shows plot in RStudio
}