EPIC ACS Exploratory Data Analysis

Author

Jonathan Wade

Code
# Quarto setup chunk
library(tidyverse)
library(lubridate)
library(janitor)
library(broom)
library(cowplot)
library(survival)     # optional, for time-to-event
library(survminer)    # optional, for Kaplan-Meier plots
library(reshape2)     # for correlation heatmap (melt)
library(GGally)       # or corrplot or other packages, if you prefer

theme_set(theme_cowplot())  # consistent plot theme

1. Load and Clean Data

Code
# Source your updated script, e.g. "ACS_gpt.R", which:
#  - Reads/cleans all baseline data
#  - Defines objects like: master_cohort, acs_admissions, acs_labs,
#    acs_ip_meds, readmission_analysis, all_hu_patients, etc.
source("ACS_gpt.R")

=== Data Files Overview ===

File: IN495734_SCD_Bone_Marrow_Baseline.txt 
Rows: 104 
Columns: 9 
Column names: PATIENT_MRN, HSP_ACCOUNT_ID, ORDER_ID, RESULT_TIME, LAB_CODE, LAB_NAME, COMPONENT_ID, LAB_COMPONENT_DESCRIPTION, BONE_MARROW_RESULTS_BY_COMPONENT 

File: IN495734_SCD_Demographics_Social_Hx_Baseline.txt 
Rows: 6657 
Columns: 10 
Column names: PATIENT_MRN, PATIENT_NAME, BIRTH_DATE, AGE, GENDER, RACE, ETHNICITY, IS_TOBACCO_USER_YN, ALCOHOL_USER_YN, ILL_DRUG_USER_YN 

File: IN495734_SCD_IP_Admissions_Baseline.txt 
Rows: 11300 
Columns: 21 
Column names: PATIENT_MRN, HSP_ACCOUNT_ID, ADM_DATE_TIME, DISCH_DATE_TIME, DISCHARGE_DEPARTMENT, DISCHARGE_DISPOSITION, ICU_ADMISSION_YN, ADMIT_DX_CD_1, ADMIT_DX_DESCRIPTION_1, ADMIT_DX_CD_2, ADMIT_DX_DESCRIPTION_2, FINAL_DX_CD_1, FINAL_DX_DESCRIPTION_1, FINAL_DX_CD_2, FINAL_DX_DESCRIPTION_2, FINAL_DX_CD_3, FINAL_DX_DESCRIPTION_3, FINAL_DX_CD_4, FINAL_DX_DESCRIPTION_4, FINAL_DX_CD_5, FINAL_DX_DESCRIPTION_5                                                            

File: IN495734_SCD_IP_Meds_Baseline.txt 
Rows: 1328884 
Columns: 10 
Column names: PATIENT_MRN, HSP_ACCOUNT_ID, ADM_DATE_TIME, DISCH_DATE_TIME, MEDICATION, DOSAGE, UNIT, FREQUENCY, TAKEN_TIME, RX_CLASS_NAME 

File: IN495734_SCD_Labs_Baseline.txt 
Rows: 7742231 
Columns: 9 
Column names: PATIENT_MRN, PAT_ENC_CSN_ID, ORDER_TIME, PROC_CODE, PROC_NAME, COMPONENT_ID, LAB_COMPONENT_DESCRIPTION, LAB_RESULT_VALUE, RESULT_TIME 

File: IN495734_SCD_OP_AVS_Meds_Baseline.txt 
Rows: 3221 
Columns: 7 
Column names: PATIENT_MRN, HSP_ACCOUNT_ID, VISIT_DATE, ORDER_MED_ID, ORDER_DTTM, RX_STATUS, GENERIC_DESCRIPTION 

File: IN495734_SCD_OP_Visits_Baseline.txt 
Rows: 117938 
Columns: 13 
Column names: PATIENT_MRN, PAT_ID, HSP_ACCOUNT_ID, VISIT_DATE, VISIT_TYPE, DEPARTMENT_ID, DEPARTMENT_NAME, BP_SYSTOLIC, BP_DIASTOLIC, WEIGHT_LBS, WEIGHT_KG, CURRENT_ICD10_LIST, DX_NAME 

=== Demographic Summary (ACS) ===
# A tibble: 1 × 5
  total_acs_patients mean_age sd_age pct_female pct_tobacco
               <int>    <dbl>  <dbl>      <dbl>       <dbl>
1                336     31.3   9.64       42.8        18.1

=== ICU Summary (ACS) ===
# A tibble: 1 × 3
  total_acs_admissions icu_acs_admissions pct_icu
                 <int>              <int>   <dbl>
1                  602                278    46.2

=== Lab Summary (Partial) ===
# A tibble: 20 × 6
   lab_component_description  timing          n mean_value median_value sd_value
   <chr>                      <chr>       <int>      <dbl>        <dbl>    <dbl>
 1 ALBUMIN,BCG-SERUM          during_adm…  3889      3.62          3.6     0.619
 2 CREATININE-SERUM           during_adm…  3889      0.941         0.7     0.871
 3 POTASSIUM-SERUM            during_adm…  3889      4.28          4.2     0.682
 4 UREA NITROGEN-SERUM        during_adm…  3889     14.7          10      15.7  
 5 LDH,TOTAL-SERUM            during_adm…  3600    711.          512     724.   
 6 BILIRUBIN,DIRECT-SERUM     during_adm…  3415      2.07          0.9     4.02 
 7 BILIRUBIN,TOTAL-SERUM      during_adm…  3415      5.20          3.4     5.54 
 8 ALKALINE PHOSPHATASE SERUM during_adm…  3357    133.          103     104.   
 9 HEMATOCRIT-BLOOD           during_adm…  2908     23.1          22.8     4.69 
10 HEMOGLOBIN-BLOOD           during_adm…  2908      7.90          7.8     1.54 
11 MCHC                       during_adm…  2908     34.2          34.2     1.61 
12 MCV                        during_adm…  2908     91.6          90      10.0  
13 RBC COUNT-BLOOD            during_adm…  2908      2.56          2.52    0.622
14 RDW                        during_adm…  2908     22.3          21.8     4.02 
15 PLATELET COUNT-BLOOD       during_adm…  2566    348.          304     221.   
16 PLATELET MPV               during_adm…  2566      8.49          8.4     0.902
17 WBC COUNT-BLOOD            during_adm…  2566     15.9          14.6     7.16 
18 % NEUTROPHILS-MAN          during_adm…  1042     67.3          69      16.1  
19 ALBUMIN,BCG-SERUM          pre_admiss…   214      4.18          4.3     0.580
20 CREATININE-SERUM           pre_admiss…   214      0.777         0.7     0.603

=== IP Meds During ACS (Top 10) ===
# A tibble: 10 × 3
   rx_class_name           n_patients n_administrations
   <chr>                        <int>             <int>
 1 Analgesics-Narcotic            336             26220
 2 Hematopoietic Agents           330              5271
 3 Nutrients                      328              6740
 4 Laxatives                      327              9242
 5 Antihistamines                 312              4465
 6 Anticoagulants                 301              4227
 7 Minerals & Electrolytes        300              9701
 8 Anti-Rheumatic                 284              4259
 9 Cephalosporins                 278              2758
10 Antiemetics                    269              1686

=== OP Meds Overview (Top 10) ===
# A tibble: 10 × 4
   drug_name                                rx_status n_patients n_prescriptions
   <chr>                                    <chr>          <int>           <int>
 1 FOLIC ACID 1 MG PO TABS                  Resume           183             259
 2 HYDROXYUREA 500 MG PO CAPS               Resume           101             126
 3 CHOLECALCIFEROL-D3 1000 UNITS PO TABS    Resume            72             109
 4 VITAMIN D (ERGOCALCIFEROL) 1.25 MG (500… Resume            61              77
 5 ALBUTEROL SULFATE HFA 108 (90 BASE) MCG… Resume            55              75
 6 NALOXONE HCL 4 MG/0.1ML NA LIQD          Resume            53              70
 7 SENNOSIDES 8.6 MG PO TABS                Resume            49              58
 8 OXYCODONE-ACETAMINOPHEN 10-325 MG PO TA… Resume            41              51
 9 FLUTICASONE PROPIONATE 50 MCG/ACT NA SU… Resume            37              53
10 DOCUSATE SODIUM 100 MG PO CAPS           Resume            33              40

=== Length of Stay (ACS) ===
# A tibble: 1 × 5
  mean_los median_los sd_los min_los max_los
     <dbl>      <dbl>  <dbl>   <dbl>   <dbl>
1     8.10       7.08   5.51   0.384    47.7

=== Readmission Analysis (ACS) ===
# A tibble: 1 × 5
  total_acs_patients patients_with_readmits pct_with_30day_readmit
               <int>                  <int>                  <dbl>
1                336                    126                   3.27
# ℹ 2 more variables: pct_with_90day_readmit <dbl>,
#   mean_acs_admissions_per_patient <dbl>

=== Hydroxyurea Flag ===
Number of patients with any Hydroxyurea usage: 975 

=== T-test on LOS (HU vs. Non-HU) ===

    Welch Two Sample t-test

data:  length_of_stay by hydroxyurea_yn
t = -1.1902, df = 195.43, p-value = 0.2354
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
 -1.8503410  0.4575782
sample estimates:
 mean in group No mean in group Yes 
         8.033542          8.729923 


=== Logistic Regression (ICU ~ HU) ===
# A tibble: 2 × 7
  term              estimate std.error statistic p.value conf.low conf.high
  <chr>                <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl>
1 (Intercept)          0.765     0.184    -1.46    0.145    0.531      1.10
2 hydroxyurea_ynYes    1.15      0.206     0.699   0.485    0.773      1.73

=== Sample Plots ===

Code
# Quick checks
cat("Number of unique patients in 'master_cohort':", n_distinct(master_cohort$patient_mrn), "\n")
Number of unique patients in 'master_cohort': 6657 
Code
cat("Number of ACS admissions:", nrow(acs_admissions), "\n")
Number of ACS admissions: 552 

1.1 Quick Summaries from Revised Script

Code
library(knitr)

cat("\n=== Demographic Summary (ACS) ===\n")

=== Demographic Summary (ACS) ===
Code
kable(demographic_summary)
total_acs_patients mean_age sd_age pct_female pct_tobacco
336 31.2648 9.644867 42.75362 18.11594
Code
cat("\n=== ICU Summary (ACS) ===\n")

=== ICU Summary (ACS) ===
Code
kable(icu_summary)
total_acs_admissions icu_acs_admissions pct_icu
602 278 46.1794
Code
cat("\n=== IP Meds During ACS (Top 10) ===\n")

=== IP Meds During ACS (Top 10) ===
Code
kable(head(acs_meds_summary, 10))
rx_class_name n_patients n_administrations
Analgesics-Narcotic 336 26220
Hematopoietic Agents 330 5271
Nutrients 328 6740
Laxatives 327 9242
Antihistamines 312 4465
Anticoagulants 301 4227
Minerals & Electrolytes 300 9701
Anti-Rheumatic 284 4259
Cephalosporins 278 2758
Antiemetics 269 1686
Code
cat("\n=== OP Meds Overview (Top 10) ===\n")

=== OP Meds Overview (Top 10) ===
Code
kable(head(op_meds_summary, 10))
drug_name rx_status n_patients n_prescriptions
FOLIC ACID 1 MG PO TABS Resume 183 259
HYDROXYUREA 500 MG PO CAPS Resume 101 126
CHOLECALCIFEROL-D3 1000 UNITS PO TABS Resume 72 109
VITAMIN D (ERGOCALCIFEROL) 1.25 MG (50000 UT) PO CAPS Resume 61 77
ALBUTEROL SULFATE HFA 108 (90 BASE) MCG/ACT IN AERS Resume 55 75
NALOXONE HCL 4 MG/0.1ML NA LIQD Resume 53 70
SENNOSIDES 8.6 MG PO TABS Resume 49 58
OXYCODONE-ACETAMINOPHEN 10-325 MG PO TABS Resume 41 51
FLUTICASONE PROPIONATE 50 MCG/ACT NA SUSP Resume 37 53
DOCUSATE SODIUM 100 MG PO CAPS Resume 33 40
Code
cat("\n=== Length of Stay (ACS) ===\n")

=== Length of Stay (ACS) ===
Code
kable(los_analysis)
mean_los median_los sd_los min_los max_los
8.100267 7.084028 5.513333 0.3840278 47.7
Code
cat("\n=== Readmission Analysis (ACS) ===\n")

=== Readmission Analysis (ACS) ===
Code
kable(readmission_analysis)
total_acs_patients patients_with_readmits pct_with_30day_readmit pct_with_90day_readmit mean_acs_admissions_per_patient
336 126 3.27381 10.11905 1.642857
Code
cat("\n=== Hydroxyurea Flag ===\n")

=== Hydroxyurea Flag ===
Code
cat("Number of patients with any Hydroxyurea usage:", nrow(all_hu_patients), "\n")
Number of patients with any Hydroxyurea usage: 975 
Code
cat("\n=== T-test on LOS (HU vs. Non-HU) ===\n")

=== T-test on LOS (HU vs. Non-HU) ===
Code
# t_test_hu_los is a test result list; print() might be appropriate
print(t_test_hu_los)

    Welch Two Sample t-test

data:  length_of_stay by hydroxyurea_yn
t = -1.1902, df = 195.43, p-value = 0.2354
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
 -1.8503410  0.4575782
sample estimates:
 mean in group No mean in group Yes 
         8.033542          8.729923 
Code
cat("\n=== Logistic Regression (ICU ~ HU) ===\n")

=== Logistic Regression (ICU ~ HU) ===
Code
kable(hu_icu_model_res)
term estimate std.error statistic p.value conf.low conf.high
(Intercept) 0.7647059 0.1842161 -1.4562461 0.1453246 0.5308746 1.095137
hydroxyurea_ynYes 1.1544471 0.2055884 0.6985878 0.4848097 0.7727603 1.732530

Interpretation: Reprints core EDA summaries from revised_prelim.R.


2. Full SCD Cohort: Demographic Visualizations

Below we visualize the full SCD cohort (unique MRNs).

Code
cohort_demo <- master_cohort %>%
  mutate(tobacco_user = if_else(is_tobacco_user_yn == "Yes", "Yes", "No/Unknown"))

## 2a. Age Distribution
ggplot(cohort_demo, aes(x = age)) +
  geom_histogram(binwidth = 5, fill = "steelblue", color = "white") +
  labs(
    title = "Age Distribution (Full SCD Cohort)",
    x = "Age (years)",
    y = "Count of Patients"
  )

Code
## 2b. Gender Distribution
ggplot(cohort_demo, aes(x = gender)) +
  geom_bar(fill = "darkorange") +
  labs(
    title = "Gender Distribution (Full SCD Cohort)",
    x = "Gender",
    y = "Number of Patients"
  )

Code
## 2c. Race Distribution
ggplot(cohort_demo, aes(x = race)) +
  geom_bar(fill = "forestgreen") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(
    title = "Race Distribution (Full SCD Cohort)",
    x = "Race",
    y = "Number of Patients"
  )

Code
# Boxplot of Age by Gender
ggplot(cohort_demo, aes(x = gender, y = age)) +
  geom_boxplot(fill = "lightblue", outlier.color = "red") +
  labs(
    title = "Age by Gender (Full SCD Cohort)",
    x = "Gender",
    y = "Age (years)"
  ) +
  theme_cowplot()


3. ACS Subset: Demographic Distributions

Focus on unique patients who had at least one ACS admission.

Code
acs_unique_patients <- acs_admissions %>%
  distinct(patient_mrn) %>%
  left_join(master_cohort, by = "patient_mrn") %>%
  mutate(tobacco_user = if_else(is_tobacco_user_yn == "Yes", "Yes", "No/Unknown"))

## 3a. Age Distribution: ACS Patients
ggplot(acs_unique_patients, aes(x = age)) +
  geom_histogram(binwidth = 5, fill = "purple", color = "white") +
  labs(
    title = "Age Distribution: ACS Patients",
    x = "Age (years)",
    y = "Count"
  )

Code
## 3b. Gender Distribution: ACS Patients
ggplot(acs_unique_patients, aes(x = gender)) +
  geom_bar(fill = "tomato") +
  labs(
    title = "Gender Distribution: ACS Patients",
    x = "Gender",
    y = "Number of Patients"
  )


4. ICU Analysis for ACS

4.1 High-Level ICU Summary

Code
kable(icu_summary)
total_acs_admissions icu_acs_admissions pct_icu
602 278 46.1794

Interpretation: Typically shows total ACS admissions and how many ended up in ICU.

4.2 ICU vs. Non-ICU: Length of Stay

Code
# If not already defined in the script, define acs_icu_los:
acs_icu_los <- acs_admissions %>%
  left_join(
    voe_acs_admissions %>%
      filter(has_acs) %>%
      select(patient_mrn, adm_date_time, icu_admission_yn),
    by = c("patient_mrn", "adm_date_time")
  ) %>%
  mutate(
    icu_flag = (icu_admission_yn == "Yes"),
    length_of_stay = as.numeric(difftime(disch_date_time, adm_date_time, units = "days"))
  )

icu_ttest <- t.test(length_of_stay ~ icu_flag, data = acs_icu_los)
icu_ttest

    Welch Two Sample t-test

data:  length_of_stay by icu_flag
t = -7.4902, df = 444.17, p-value = 3.742e-13
alternative hypothesis: true difference in means between group FALSE and group TRUE is not equal to 0
95 percent confidence interval:
 -4.611836 -2.694715
sample estimates:
mean in group FALSE  mean in group TRUE 
           6.904049           10.557324 

5. Readmission Analysis

5.1 ICU vs. Non-ICU Readmissions

Code
# If 'readmit_compare' not defined, define it here:
acs_readmit_icu <- acs_admissions %>%
  arrange(patient_mrn, adm_date_time) %>%
  group_by(patient_mrn) %>%
  mutate(
    next_admission = lead(adm_date_time),
    days_to_next   = as.numeric(difftime(next_admission, disch_date_time, units = "days"))
  ) %>%
  ungroup() %>%
  left_join(
    voe_acs_admissions %>%
      filter(has_acs) %>%
      select(patient_mrn, adm_date_time, icu_admission_yn),
    by = c("patient_mrn", "adm_date_time")
  ) %>%
  mutate(icu_flag = (icu_admission_yn == "Yes"))

readmit_compare <- acs_readmit_icu %>%
  group_by(icu_flag) %>%
  summarise(
    mean_days_to_next   = mean(days_to_next, na.rm = TRUE),
    median_days_to_next = median(days_to_next, na.rm = TRUE),
    readmit_rate_30d    = mean(days_to_next <= 30, na.rm = TRUE) * 100,
    readmit_rate_90d    = mean(days_to_next <= 90, na.rm = TRUE) * 100,
    .groups = "drop"
  )

kable(readmit_compare)
icu_flag mean_days_to_next median_days_to_next readmit_rate_30d readmit_rate_90d
FALSE 495.3241 299.0837 5.468750 23.43750
TRUE 489.2667 316.8174 4.807692 23.07692

Interpretation: Checks if ICU vs. Non-ICU admissions come back sooner or more often.


6. Extended Lab Analysis

6.1 Spaghetti Plots for Multiple ACS (Hemoglobin Example)

Code
## Multiple ACS admissions
acs_multi <- acs_admissions %>%
  group_by(patient_mrn) %>%
  filter(n() > 1) %>%
  ungroup()

# Earliest ACS date per patient
acs_earliest <- acs_multi %>%
  group_by(patient_mrn) %>%
  summarise(first_acs_date = min(adm_date_time, na.rm = TRUE), .groups="drop")

hgb_spaghetti <- acs_earliest %>%
  left_join(acs_labs, by="patient_mrn") %>%
  filter(lab_component_description == "HEMOGLOBIN-BLOOD") %>%
  mutate(
    hgb_value = as.numeric(lab_result_value),
    days_from_first_acs = as.numeric(difftime(result_time, first_acs_date, units="days"))
  ) %>%
  filter(!is.na(hgb_value))

ggplot(hgb_spaghetti, aes(x = days_from_first_acs, y = hgb_value, group = patient_mrn)) +
  geom_line(alpha=0.3) +
  geom_smooth(aes(group=1), method="loess", se=FALSE, color="red") +
  labs(
    title="Longitudinal Hemoglobin Trends (Multiple ACS Patients)",
    x="Days from First ACS Admission",
    y="Hemoglobin (g/dL)"
  ) +
  theme_bw()


7. Medications Beyond Hydroxyurea

7.1 Antibiotic Analysis

Code
antibiotic_summary <- acs_ip_meds %>%
  filter(grepl("(?i)cef|azithro|levo|vanco|penicillin", medication)) %>%
  group_by(medication) %>%
  summarise(
    n_patients = n_distinct(patient_mrn),
    n_admins   = n(),
    .groups="drop"
  ) %>%
  arrange(desc(n_patients))

ggplot(antibiotic_summary %>% head(10), aes(x=reorder(medication, n_patients), y=n_patients)) +
  geom_bar(stat="identity", fill="darkblue") +
  coord_flip() +
  labs(
    title="Top Antibiotics Used in ACS Admissions",
    x="Medication",
    y="Number of Patients"
  ) +
  theme_cowplot()

7.2 Opioid vs. Non-Opioid Analgesics

Code
opioid_data <- acs_ip_meds %>%
  mutate(
    is_opioid = if_else(grepl("(?i)narcotic", rx_class_name), "Opioid", "Non-Opioid")
  ) %>%
  group_by(is_opioid) %>%
  summarise(
    n_patients_acs = n_distinct(patient_mrn),
    total_admins    = n(),
    .groups="drop"
  )

kable(opioid_data)
is_opioid n_patients_acs total_admins
Non-Opioid 336 74540
Opioid 336 29120

Interpretation: Check correlation with ICU or LOS if desired.


8. Additional Statistical Tests & Specific Drug Subcategories

Code
# Define patterns for certain opioids & NSAIDs
opioid_patterns <- c("(?i)morphine", "(?i)hydromorphone", "(?i)fentanyl", "(?i)oxycodone", "(?i)hydrocodone")
nsaid_patterns <- c("(?i)ibuprofen", "(?i)ketorolac", "(?i)naproxen", "(?i)meloxicam", "(?i)diclofenac")
acetamin_pattern <- "(?i)acetaminophen|paracetamol"

acs_ip_meds_specific <- acs_admissions %>%
  left_join(acs_ip_meds, by=c("patient_mrn","adm_date_time")) %>%
  filter(!is.na(medication)) %>%
  mutate(
    med_lower = tolower(medication),
    pain_class = case_when(
      str_detect(med_lower, regex(paste(opioid_patterns,collapse="|"), ignore_case=TRUE)) ~ "Opioid",
      str_detect(med_lower, regex(paste(nsaid_patterns,collapse="|"), ignore_case=TRUE))  ~ "NSAID",
      str_detect(med_lower, regex(acetamin_pattern, ignore_case=TRUE))                    ~ "Acetaminophen",
      TRUE ~ "Other"
    )
  )

pain_class_usage <- acs_ip_meds_specific %>%
  group_by(pain_class) %>%
  summarise(
    admissions_with_class = n_distinct(paste(patient_mrn, adm_date_time)),
    .groups="drop"
  ) %>%
  arrange(desc(admissions_with_class))

kable(pain_class_usage)
pain_class admissions_with_class
Other 552
Opioid 546
NSAID 439
Acetaminophen 368
Code
acs_pain_group <- acs_ip_meds_specific %>%
  group_by(patient_mrn, adm_date_time) %>%
  summarise(
    used_opioid   = any(pain_class=="Opioid"),
    used_nsaid    = any(pain_class=="NSAID"),
    used_acetamin = any(pain_class=="Acetaminophen"),
    .groups="drop"
  ) %>%
  mutate(
    pain_pattern = case_when(
      used_opioid & !used_nsaid & !used_acetamin ~ "Opioid_only",
      used_nsaid  & !used_opioid & !used_acetamin ~ "NSAID_only",
      used_acetamin & !used_opioid & !used_nsaid  ~ "Acetamin_only",
      (used_opioid & used_nsaid) | (used_opioid & used_acetamin) | (used_nsaid & used_acetamin) ~ "Mixed",
      TRUE ~ "None"
    )
  )

acs_pain_los <- acs_admissions %>%
  mutate(
    length_of_stay = as.numeric(difftime(disch_date_time, adm_date_time, units="days"))
  ) %>%
  left_join(acs_pain_group, by=c("patient_mrn","adm_date_time")) %>%
  filter(!is.na(length_of_stay))

kruskal_los <- kruskal.test(length_of_stay ~ pain_pattern, data=acs_pain_los)
kable(tibble(Statistic = kruskal_los$statistic,
             DF = kruskal_los$parameter,
             P_Value = kruskal_los$p.value))
Statistic DF P_Value
17.41987 3 0.0005792
Code
acs_icu_model_data <- acs_admissions %>%
  left_join(
    voe_acs_admissions %>%
      filter(has_acs) %>%
      select(patient_mrn, adm_date_time, icu_admission_yn),
    by=c("patient_mrn","adm_date_time")
  ) %>%
  left_join(acs_pain_group, by=c("patient_mrn","adm_date_time")) %>%
  left_join(master_cohort_hu %>% select(patient_mrn, hydroxyurea_yn), by="patient_mrn") %>%
  mutate(
    icu_flag = icu_admission_yn == "Yes"
  ) %>%
  left_join(
    acs_labs %>% 
      filter(lab_component_description=="WBC COUNT-BLOOD", timing=="during_admission") %>%
      group_by(patient_mrn, adm_date_time) %>%
      summarise(wbc_adm=median(as.numeric(lab_result_value),na.rm=TRUE),.groups="drop"),
    by=c("patient_mrn","adm_date_time")
  ) %>%
  filter(!is.na(icu_flag))

pain_icu_glm <- glm(
  icu_flag ~ pain_pattern + wbc_adm + hydroxyurea_yn,
  data=acs_icu_model_data,
  family=binomial
)

kable(tidy(pain_icu_glm, conf.int=TRUE, exponentiate=TRUE))
term estimate std.error statistic p.value conf.low conf.high
(Intercept) 1.187000e+00 1.2077686 0.1419384 0.8871287 0.1350613 25.429226
pain_patternMixed 3.137978e-01 1.1606053 -0.9986224 0.3179777 0.0154253 2.483261
pain_patternNone 3.262090e+05 535.4124593 0.0237112 0.9810829 0.0000000 NA
pain_patternOpioid_only 2.064155e-01 1.2107474 -1.3032149 0.1925014 0.0095917 1.818203
wbc_adm 1.041375e+00 0.0150420 2.6952372 0.0070339 1.0114947 1.073098
hydroxyurea_ynYes 1.314663e+00 0.2132605 1.2828469 0.1995457 0.8677014 2.004962
Code
acs_discharge_labs <- acs_admissions %>%
  left_join(clean_data_list$labs, by="patient_mrn") %>%
  filter(
    !is.na(result_time),
    result_time >= (disch_date_time - ddays(1)),
    result_time <= (disch_date_time + ddays(1)),
    lab_component_description=="HEMOGLOBIN-BLOOD"
  ) %>%
  group_by(patient_mrn, adm_date_time) %>%
  summarise(
    discharge_hgb = mean(as.numeric(lab_result_value),na.rm=TRUE),
    .groups="drop"
  )

acs_readmit_details <- acs_admissions %>%
  arrange(patient_mrn, adm_date_time) %>%
  group_by(patient_mrn) %>%
  mutate(
    next_adm       = lead(adm_date_time),
    days_to_next   = as.numeric(difftime(next_adm, disch_date_time,units="days")),
    is_30day_readm = !is.na(days_to_next) & days_to_next<=30
  ) %>%
  ungroup() %>%
  select(patient_mrn, adm_date_time, is_30day_readm, days_to_next)

acs_readmit_labs <- acs_discharge_labs %>%
  left_join(acs_readmit_details, by=c("patient_mrn","adm_date_time")) %>%
  mutate(hgb_lt7 = discharge_hgb<7)

kable(table(acs_readmit_labs$hgb_lt7, acs_readmit_labs$is_30day_readm))
FALSE TRUE
FALSE 173 1
TRUE 36 0

9. Lab Values Before, During, and After Discharge (Specific List)

Below, we create a dedicated analysis for labs around ACS admissions: pre (±X days before), during, and post (±X days after). We’ll focus on these specific labs (feel free to expand):

- RBC COUNT-BLOOD
- HEMATOCRIT-BLOOD
- HEMOGLOBIN-BLOOD
- MCV
- MCHC
- RDW
- WBC COUNT-BLOOD
- PLATELET COUNT-BLOOD
- LDH,TOTAL-SERUM
- BILIRUBIN,DIRECT-SERUM
- BILIRUBIN,TOTAL-SERUM
- CREATININE-SERUM
- UREA NITROGEN-SERUM
- ALBUMIN,BCG-SERUM
- ALKALINE PHOSPHATASE SERUM
- POTASSIUM-SERUM
Code
my_labs_of_interest <- c(
  "RBC COUNT-BLOOD", "HEMATOCRIT-BLOOD", "HEMOGLOBIN-BLOOD", "MCV", "MCHC", "RDW",
  "WBC COUNT-BLOOD", "PLATELET COUNT-BLOOD", "LDH,TOTAL-SERUM",
  "BILIRUBIN,DIRECT-SERUM", "BILIRUBIN,TOTAL-SERUM", 
  "CREATININE-SERUM", "UREA NITROGEN-SERUM",
  "ALBUMIN,BCG-SERUM", "ALKALINE PHOSPHATASE SERUM", "POTASSIUM-SERUM"
)

acs_labs_3windows <- acs_admissions %>%
  # If not defined, do a new left_join with labs ±(7 or X days):
  left_join(clean_data_list$labs, by="patient_mrn") %>%
  filter(
    !is.na(result_time),
    result_time >= adm_date_time - days(7),
    result_time <= disch_date_time + days(7),
    lab_component_description %in% my_labs_of_interest
  ) %>%
  mutate(
    timing = case_when(
      result_time < adm_date_time ~ "pre_admission",
      result_time > disch_date_time ~ "post_discharge",
      TRUE ~ "during_admission"
    ),
    lab_value_num = as.numeric(lab_result_value)
  )

# Summaries of each lab by timing
lab_3win_summary <- acs_labs_3windows %>%
  group_by(lab_component_description, timing) %>%
  summarise(
    n = n(),
    mean_val  = mean(lab_value_num, na.rm=TRUE),
    median_val= median(lab_value_num, na.rm=TRUE),
    sd_val    = sd(lab_value_num, na.rm=TRUE),
    .groups="drop"
  ) %>%
  arrange(lab_component_description, timing)

kable(lab_3win_summary)
lab_component_description timing n mean_val median_val sd_val
ALBUMIN,BCG-SERUM during_admission 3889 3.6161481 3.600 0.6193190
ALBUMIN,BCG-SERUM post_discharge 144 3.8347222 4.000 0.6969784
ALBUMIN,BCG-SERUM pre_admission 214 4.1761682 4.300 0.5801427
ALKALINE PHOSPHATASE SERUM during_admission 3357 133.0545130 103.000 103.6235667
ALKALINE PHOSPHATASE SERUM post_discharge 129 127.8372093 104.000 84.0500029
ALKALINE PHOSPHATASE SERUM pre_admission 203 104.1871921 90.000 56.3670295
BILIRUBIN,DIRECT-SERUM during_admission 3415 2.0744431 0.900 4.0248201
BILIRUBIN,DIRECT-SERUM post_discharge 129 1.1682171 0.600 1.3706741
BILIRUBIN,DIRECT-SERUM pre_admission 203 1.0359606 0.700 1.1488856
BILIRUBIN,TOTAL-SERUM during_admission 3415 5.1983304 3.400 5.5385367
BILIRUBIN,TOTAL-SERUM post_discharge 130 3.3915385 2.600 2.5615534
BILIRUBIN,TOTAL-SERUM pre_admission 203 4.3403941 3.600 2.9847706
CREATININE-SERUM during_admission 3889 0.9405357 0.700 0.8711413
CREATININE-SERUM post_discharge 143 1.0328671 0.700 1.3097025
CREATININE-SERUM pre_admission 214 0.7766355 0.700 0.6032898
HEMATOCRIT-BLOOD during_admission 2908 23.1075465 22.800 4.6868568
HEMATOCRIT-BLOOD post_discharge 137 24.8264706 24.600 4.6377395
HEMATOCRIT-BLOOD pre_admission 197 23.9852792 23.800 4.8161968
HEMOGLOBIN-BLOOD during_admission 2908 7.8961061 7.800 1.5371921
HEMOGLOBIN-BLOOD post_discharge 137 8.3291971 8.300 1.5221483
HEMOGLOBIN-BLOOD pre_admission 197 8.2675127 8.200 1.5665380
LDH,TOTAL-SERUM during_admission 3600 711.0873922 512.000 724.1584738
LDH,TOTAL-SERUM post_discharge 108 490.7129630 433.000 264.5053592
LDH,TOTAL-SERUM pre_admission 198 492.8010204 402.500 293.7220668
MCHC during_admission 2908 34.2468987 34.200 1.6097993
MCHC post_discharge 137 33.5323529 33.600 1.3021852
MCHC pre_admission 197 34.5741117 34.600 1.6208746
MCV during_admission 2908 91.5767057 90.000 10.0098983
MCV post_discharge 137 95.5955882 93.500 13.0386199
MCV pre_admission 197 94.5634518 94.000 10.9545575
PLATELET COUNT-BLOOD during_admission 2566 348.0983991 304.000 221.1934790
PLATELET COUNT-BLOOD post_discharge 110 532.3454545 473.500 377.4113283
PLATELET COUNT-BLOOD pre_admission 185 353.9783784 342.000 150.6929991
POTASSIUM-SERUM during_admission 3889 4.2838005 4.200 0.6824142
POTASSIUM-SERUM post_discharge 143 4.3447552 4.300 0.5760084
POTASSIUM-SERUM pre_admission 214 4.3257009 4.300 0.5964238
RBC COUNT-BLOOD during_admission 2908 2.5623088 2.520 0.6217601
RBC COUNT-BLOOD post_discharge 137 2.6639706 2.665 0.6626292
RBC COUNT-BLOOD pre_admission 197 2.5787310 2.550 0.6223211
RDW during_admission 2908 22.3091097 21.800 4.0178012
RDW post_discharge 137 22.8632353 22.700 4.6055170
RDW pre_admission 197 23.3502538 22.900 4.4443799
UREA NITROGEN-SERUM during_admission 3889 14.7375193 10.000 15.7399905
UREA NITROGEN-SERUM post_discharge 143 13.1258741 10.000 10.0791705
UREA NITROGEN-SERUM pre_admission 214 10.5046729 8.000 7.8577420
WBC COUNT-BLOOD during_admission 2566 15.9025391 14.600 7.1561300
WBC COUNT-BLOOD post_discharge 110 12.8645455 11.350 5.8863485
WBC COUNT-BLOOD pre_admission 185 14.1854054 13.100 5.1975384

Interpretation: This table shows each lab’s counts and basic stats for pre, during, and post admission windows.

10.1 Kruskal-Wallis Across Pre/During/Post for Each Lab

Code
kruskal_results <- acs_labs_3windows %>%
  group_by(lab_component_description) %>%
  filter(n() >= 30) %>%  # minimal sample size
  nest() %>%
  mutate(
    kw = map(data, ~ kruskal.test(lab_value_num ~ timing, data=.x) %>% broom::tidy())
  ) %>%
  select(lab_component_description, kw) %>%
  unnest(kw)

kable(kruskal_results)
lab_component_description statistic p.value parameter method
BILIRUBIN,TOTAL-SERUM 17.419454 0.0001650 2 Kruskal-Wallis rank sum test
BILIRUBIN,DIRECT-SERUM 35.887930 0.0000000 2 Kruskal-Wallis rank sum test
CREATININE-SERUM 5.529431 0.0629940 2 Kruskal-Wallis rank sum test
POTASSIUM-SERUM 5.867765 0.0531901 2 Kruskal-Wallis rank sum test
UREA NITROGEN-SERUM 11.131272 0.0038271 2 Kruskal-Wallis rank sum test
ALBUMIN,BCG-SERUM 180.268424 0.0000000 2 Kruskal-Wallis rank sum test
ALKALINE PHOSPHATASE SERUM 14.429187 0.0007358 2 Kruskal-Wallis rank sum test
WBC COUNT-BLOOD 32.810935 0.0000001 2 Kruskal-Wallis rank sum test
RBC COUNT-BLOOD 4.067648 0.1308343 2 Kruskal-Wallis rank sum test
HEMOGLOBIN-BLOOD 21.066543 0.0000266 2 Kruskal-Wallis rank sum test
HEMATOCRIT-BLOOD 23.338328 0.0000086 2 Kruskal-Wallis rank sum test
RDW 12.496788 0.0019336 2 Kruskal-Wallis rank sum test
MCHC 37.066304 0.0000000 2 Kruskal-Wallis rank sum test
MCV 30.006108 0.0000003 2 Kruskal-Wallis rank sum test
PLATELET COUNT-BLOOD 50.299044 0.0000000 2 Kruskal-Wallis rank sum test
LDH,TOTAL-SERUM 40.984612 0.0000000 2 Kruskal-Wallis rank sum test

Interpretation: For each lab, we run a Kruskal-Wallis test comparing the 3 timing groups. If p.value < 0.05, at least one group differs significantly.

(You could do pairwise Wilcoxon tests next.)

Code
# Example for just RBC COUNT-BLOOD
rbc_data <- acs_labs_3windows %>% 
  filter(lab_component_description == "RBC COUNT-BLOOD") 
pairwise.wilcox.test(rbc_data$lab_value_num, rbc_data$timing)

Aditional Visuals:

Code
# Let's focus on labs "during_admission" only
lab_during <- acs_labs_3windows %>%
  filter(timing == "during_admission") %>%
  select(patient_mrn, lab_component_description, lab_value_num) %>%
  filter(!is.na(lab_value_num))

# Pivot wide: each row = patient_mrn, each column = lab
lab_during_wide <- lab_during %>%
  group_by(patient_mrn, lab_component_description) %>%
  summarise(value = median(lab_value_num, na.rm=TRUE), .groups="drop") %>%
  pivot_wider(
    names_from = lab_component_description,
    values_from = value
  ) %>%
  ungroup() %>%
  select(-patient_mrn)  # we just want numeric columns

# Calculate correlation matrix
corr_mat <- cor(lab_during_wide, use="pairwise.complete.obs", method="spearman")

# Convert to a molten DF for ggplot, or use a helper library like ggcorrplot
corr_df <- melt(corr_mat, varnames = c("lab1","lab2"), value.name="corr_value")

ggplot(corr_df, aes(x=lab1, y=lab2, fill=corr_value)) +
  geom_tile() +
  scale_fill_gradient2(low = "blue", mid="white", high="red", midpoint = 0) +
  theme(axis.text.x = element_text(angle=45, hjust=1)) +
  labs(
    title="Spearman Correlation of Selected Labs (During ACS)",
    x="Lab Component",
    y="Lab Component",
    fill="Corr"
  ) +
  coord_fixed()

Code
# Merge ICU flag into the labs data
labs_3windows_icu <- acs_labs_3windows %>%
  # Join to get icu_admission_yn from the VOE+ACS admissions table
  left_join(
    voe_acs_admissions %>%
      filter(has_acs) %>%
      select(patient_mrn, adm_date_time, icu_admission_yn),
    by = c("patient_mrn", "adm_date_time")
  ) %>%
  mutate(
    icu_flag = (icu_admission_yn == "Yes")  # TRUE if ICU admission
  )

# Boxplot for all ACS patients (ICU + non-ICU combined)
ggplot(labs_3windows_icu, aes(x = timing, y = lab_value_num)) +
  geom_boxplot(fill = "lightblue", outlier.color = "red") +
  facet_wrap(~ lab_component_description, scales = "free_y") +
  labs(
    title = "Labs Pre-, During-, Post-Admission (All ACS Patients)",
    x = "Timing",
    y = "Lab Value"
  ) +
  theme_cowplot() +
  theme(
    strip.text = element_text(size = 8),   # smaller facet labels if needed
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

Code
# Boxplot for ICU patients only
ggplot(
  labs_3windows_icu %>% filter(icu_flag == TRUE),
  aes(x = timing, y = lab_value_num)
) +
  geom_boxplot(fill = "tomato", outlier.color = "red") +
  facet_wrap(~ lab_component_description, scales = "free_y") +
  labs(
    title = "Labs Pre-, During-, Post-Admission (ICU Patients Only)",
    x = "Timing",
    y = "Lab Value"
  ) +
  theme_cowplot() +
  theme(
    strip.text = element_text(size = 8),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

Code
# Suppose we want to see RBC vs. WBC "during_admission"
rbc_wbc <- acs_labs_3windows %>%
  filter(timing == "during_admission") %>%
  filter(lab_component_description %in% c("RBC COUNT-BLOOD", "WBC COUNT-BLOOD")) %>%
  select(patient_mrn, adm_date_time, lab_component_description, lab_value_num) %>%
  pivot_wider(
    names_from  = lab_component_description,
    values_from = lab_value_num,
    # Summarize duplicates by mean, median, max, etc.
    values_fn   = ~ mean(.x, na.rm=TRUE)
  ) %>%
  rename(
    RBC = `RBC COUNT-BLOOD`,
    WBC = `WBC COUNT-BLOOD`
  )
ggplot(rbc_wbc, aes(x = RBC, y = WBC)) +
  geom_point(alpha=0.5, color="darkblue") +
  geom_smooth(method="lm", se=FALSE, color="red") +
  labs(
    title="RBC vs. WBC (During Admission)",
    x="RBC Count (x10^6/µL)",
    y="WBC Count (x10^3/µL)"
  ) +
  theme_cowplot()