The packages

> library(lubridate) # ymd()
> library(dplyr) # %>%, select(), mutate(), recode()

Tuning parameters

Defining age classes:

> breaks <- c(-1, 10, 20, 40, 60, 100)

Defining groups of bacteria:

> others <- c("Escherichia coli", "Streptococcus suis", "Cryptococcus neoformans",
+             "Staphylococcus aureus", "Klebsiella pneumoniae", "Penicillium marneffei",
+             "Streptococcus pneumoniae", "Salmonella sp.", "Pseudomonas pseudomallei",
+             "Pseudomonas aeruginosa", "Acinetobacter baumannii", "Pseudomonas pseudomallei")
> others <- setNames(rep("others", length(others)), others)

Defining groups of specimen:

> Other <- setNames(rep("Other", 2), c("BLOOD", "CSF"))

Defining the threshold to separate HAI from CAI:

> hai_cai_thresh <- 48

Preparing the data

> maldi <- read.csv("malditof.csv",stringsAsFactors = FALSE) %>% 
+   select(Sex, Age, AdmissionDate, AdmissionTime, ward, Specimen,
+          Specimen.Origin, InfectionReason, GDetectedDate, GDetectedTime, BacName) %>% 
+   mutate(adm.growth = 24 * (ymd(GDetectedDate) - ymd(AdmissionDate)) 
+                       + ifelse(is.na(GDetectedTime - AdmissionTime),
+                                0,
+                                GDetectedTime - AdmissionTime),
+          AgeCat     = cut(Age, breaks, right = TRUE),
+          BacName    = recode(BacName,
+                              "Streptococcus suis type 2" = "Streptococcus suis"),
+          BacGroup   = recode(BacName, !!!others),
+          Specimen   = recode(Specimen.Origin, !!!Other),
+          HAI        = adm.growth > hai_cai_thresh)