# Load necessary libraries
library(survey) # For handling complex survey design
## Warning: package 'survey' was built under R version 4.3.3
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(mice) # For multiple imputation of missing data
## Warning: package 'mice' was built under R version 4.3.3
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(dplyr) # For data manipulation
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2) # For data visualization
## Warning: package 'ggplot2' was built under R version 4.3.3
library(psych) # For reliability analysis
## Warning: package 'psych' was built under R version 4.3.3
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(caret) # For resampling and imbalanced data handling
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 4.3.3
##
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
##
## cluster
library(knitr) # For dynamic report generation
## Warning: package 'knitr' was built under R version 4.3.3
library(lattice) # For advanced data visualization
library(tidyverse) # For data manipulation and visualization
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'tibble' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'stringr' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ psych::%+%() masks ggplot2::%+%()
## ✖ psych::alpha() masks ggplot2::alpha()
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks mice::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::lift() masks caret::lift()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(likert) # For Likert scale analysis
## Warning: package 'likert' was built under R version 4.3.3
## Loading required package: xtable
## Warning: package 'xtable' was built under R version 4.3.3
##
## Attaching package: 'likert'
##
## The following object is masked from 'package:dplyr':
##
## recode
library(MASS) # For advanced statistical analysis
## Warning: package 'MASS' was built under R version 4.3.3
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library(viridis) # For color-blind friendly visualization
## Warning: package 'viridis' was built under R version 4.3.3
## Loading required package: viridisLite
## Warning: package 'viridisLite' was built under R version 4.3.3
library(here) # For managing file paths
## Warning: package 'here' was built under R version 4.3.3
## here() starts at C:/nick/practice
library(flextable) # For creating flexible tables
## Warning: package 'flextable' was built under R version 4.3.3
##
## Attaching package: 'flextable'
##
## The following object is masked from 'package:xtable':
##
## align
##
## The following object is masked from 'package:purrr':
##
## compose
library(devtools) # For installing and managing packages from GitHub
## Warning: package 'devtools' was built under R version 4.3.3
## Loading required package: usethis
## Warning: package 'usethis' was built under R version 4.3.3
library(GPArotation) # For factor rotation
## Warning: package 'GPArotation' was built under R version 4.3.3
##
## Attaching package: 'GPArotation'
##
## The following objects are masked from 'package:psych':
##
## equamax, varimin
library(ufs) # For user-friendly science
# Import the CSV file into R
survey_data <- read.csv("C:/nick/practice/traumadata/traumaData.csv")
# View the structure of the imported data
str(survey_data)
## 'data.frame': 256 obs. of 53 variables:
## $ code : int 1 2 3 4 5 6 7 8 9 10 ...
## $ county : chr "Migori" "Migori" "Migori" "Migori" ...
## $ time : chr "endline" "endline" "endline" "endline" ...
## $ group : chr "control" "control" "control" "control" ...
## $ age : int 30 38 34 28 32 41 22 29 33 45 ...
## $ gender : chr "male" "male" "female" "female" ...
## $ mstatus : chr "married" "married" "married" "single" ...
## $ edulevel : chr "secondary" "secondary" "secondary" "college" ...
## $ occup : chr "mason" "tailor" "business" "business" ...
## $ dateinjury : chr "20/8/2022" "15/8/2022" "2/9/2022" "8/9/2022" ...
## $ timeinj : chr "1500hrs" "1000hrs" "1300hrs" "0900hrs" ...
## $ MOI : chr "fall" "rti" "rti" "stab" ...
## $ anatlocation : chr "head" "chest" "extremities, pelvis" "chest" ...
## $ ISS : int 16 15 15 15 15 15 15 16 15 16 ...
## $ admdate : chr "20/8/2022" "15/8/2022" "2/9/2022" "8/9/2022" ...
## $ arrivaltime : chr "1530hrs" "1040hrs" "1400hrs" "1000hrs" ...
## $ arrivalmode : chr "car" "car" "motorbike" "taxi" ...
## $ refby : chr "self" "self" "self" "nurse" ...
## $ receiveby : chr "rco" "nurse" "nurse" "mo" ...
## $ triagecategory : chr "urgent" "urgent" "urgent" "immediate" ...
## $ timeassmnt : chr "1550hrs" "1055hrs" "1430hrs" "1025hrs" ...
## $ Durationmins : int 20 15 30 25 20 25 30 30 30 40 ...
## $ state : chr "alive" "alive" "alive" "alive" ...
## $ HRinitial : int 80 88 75 90 80 95 90 90 86 105 ...
## $ SBPinitial : int 120 130 125 130 120 125 115 110 130 110 ...
## $ DBPinitial : int 80 76 80 80 75 74 78 80 60 70 ...
## $ RRinitial : int 20 18 22 20 22 21 18 18 22 20 ...
## $ SPO2initial : int 94 93 90 95 95 93 94 94 90 95 ...
## $ painscore : int 10 10 10 9 10 10 8 9 10 10 ...
## $ GCS : int 14 13 15 15 13 12 15 13 14 15 ...
## $ AVPU : chr "a" "v" "a" "a" ...
## $ airwaystatus : chr "patent" "patent" "patent" "patent" ...
## $ airwayinterven : chr "none" "none" "none" "none" ...
## $ spinestabilized : chr "no" "no" "no" "no" ...
## $ breathingstatus : chr "spont" "spont" "spont" "spont" ...
## $ breathinginterven: chr "none" "none" "none" "none" ...
## $ bleedingcontrol : chr "IVF" "bandage" "pressure" "bandage" ...
## $ Fluidadmin : chr "NS 500mls" "none" "NS 500mls" "NS 500mls" ...
## $ meds : chr "tramadol" "tramadol" "tramadol" "analgesics" ...
## $ Eddeparture : chr "1730hrs" "1200hrs" "1600hrs" "1110hrs" ...
## $ Edduration_mins : int 100 65 90 45 50 125 100 80 120 130 ...
## $ HRdispo : int 95 78 95 87 78 96 95 60 95 70 ...
## $ SBPdispo : int 93 112 105 127 130 110 105 100 133 120 ...
## $ DBPdispo : int 70 80 85 69 78 70 80 55 80 70 ...
## $ RRdispo : int 20 16 21 19 16 15 18 18 21 20 ...
## $ SPO2dispo : int 95 95 93 94 94 93 95 93 94 95 ...
## $ Admit : chr "icu" "ward" "ward" "ward" ...
## $ hospdischarge : chr "21/9/2022" "14/9/2022" "2/10/2022" "23/9/2022" ...
## $ LOSdays : int 30 29 30 15 17 22 26 34 29 30 ...
## $ state.1 : chr "stable" "stable" "stable" "stable" ...
## $ outcome : chr "alive" "alive" "alive" "alive" ...
## $ ICU_LOS : int 14 NA NA NA NA 10 NA NA NA NA ...
## $ ward_LOS : int 16 NA NA NA NA 12 NA NA NA NA ...
# View the first few rows of the data
head(survey_data)
## code county time group age gender mstatus edulevel occup
## 1 1 Migori endline control 30 male married secondary mason
## 2 2 Migori endline control 38 male married secondary tailor
## 3 3 Migori endline control 34 female married secondary business
## 4 4 Migori endline control 28 female single college business
## 5 5 Migori endline control 32 male married secondary driver
## 6 6 Migori endline control 41 male married college administrator
## dateinjury timeinj MOI anatlocation ISS admdate arrivaltime
## 1 20/8/2022 1500hrs fall head 16 20/8/2022 1530hrs
## 2 15/8/2022 1000hrs rti chest 15 15/8/2022 1040hrs
## 3 2/9/2022 1300hrs rti extremities, pelvis 15 2/9/2022 1400hrs
## 4 8/9/2022 0900hrs stab chest 15 8/9/2022 1000hrs
## 5 13/9/2022 2000hrs rti head 15 13/9/2022 2030hrs
## 6 17/9/2022 1000hrs fall head, face 15 17/9/2022 1100hrs
## arrivalmode refby receiveby triagecategory timeassmnt Durationmins state
## 1 car self rco urgent 1550hrs 20 alive
## 2 car self nurse urgent 1055hrs 15 alive
## 3 motorbike self nurse urgent 1430hrs 30 alive
## 4 taxi nurse mo immediate 1025hrs 25 alive
## 5 motorbike self nurse urgent 2300hrs 20 alive
## 6 motorbike self nurse immediate 1325hrs 25 alive
## HRinitial SBPinitial DBPinitial RRinitial SPO2initial painscore GCS AVPU
## 1 80 120 80 20 94 10 14 a
## 2 88 130 76 18 93 10 13 v
## 3 75 125 80 22 90 10 15 a
## 4 90 130 80 20 95 9 15 a
## 5 80 120 75 22 95 10 13 a
## 6 95 125 74 21 93 10 12 p
## airwaystatus airwayinterven spinestabilized breathingstatus breathinginterven
## 1 patent none no spont none
## 2 patent none no spont none
## 3 patent none no spont none
## 4 patent none no spont none
## 5 patent none no spont none
## 6 obstructed none no supplemental none
## bleedingcontrol Fluidadmin meds Eddeparture Edduration_mins HRdispo
## 1 IVF NS 500mls tramadol 1730hrs 100 95
## 2 bandage none tramadol 1200hrs 65 78
## 3 pressure NS 500mls tramadol 1600hrs 90 95
## 4 bandage NS 500mls analgesics 1110hrs 45 87
## 5 IVF NS 500mls tramadol 2350hrs 50 78
## 6 none mannitol tramadol 1530hrs 125 96
## SBPdispo DBPdispo RRdispo SPO2dispo Admit hospdischarge LOSdays state.1
## 1 93 70 20 95 icu 21/9/2022 30 stable
## 2 112 80 16 95 ward 14/9/2022 29 stable
## 3 105 85 21 93 ward 2/10/2022 30 stable
## 4 127 69 19 94 ward 23/9/2022 15 stable
## 5 130 78 16 94 ward 30/9/2022 17 stable
## 6 110 70 15 93 icu 9/10/2022 22 stable
## outcome ICU_LOS ward_LOS
## 1 alive 14 16
## 2 alive NA NA
## 3 alive NA NA
## 4 alive NA NA
## 5 alive NA NA
## 6 alive 10 12
# Load the dplyr library
library(dplyr)
# Convert date columns to Date format
survey_data <- survey_data %>%
mutate(dateinjury = as.Date(dateinjury),
admdate = as.Date(admdate),
hospdischarge = as.Date(hospdischarge))
#summary of demographics
library(dplyr)
# Read the survey data from the CSV file
survey_data1 <- read.csv("survey_data1.csv")
# Summarize variables by time and treat_control
summary_age <- survey_data1 %>%
group_by(time, treat_control) %>%
summarize(
mean_age = mean(age, na.rm = TRUE),
)
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Display the summary table
summary_age
## # A tibble: 4 × 3
## # Groups: time [2]
## time treat_control mean_age
## <chr> <chr> <dbl>
## 1 baseline control 39.0
## 2 baseline treatment 40.0
## 3 endline control 41.9
## 4 endline treatment 39.3
summary_gender <- survey_data1 %>%
group_by(time, treat_control) %>%
summarize(
gender_count_male = sum(gender == "male", na.rm = TRUE),
gender_count_female = sum(gender == "female", na.rm = TRUE)
)
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Display the summary table
summary_gender
## # A tibble: 4 × 4
## # Groups: time [2]
## time treat_control gender_count_male gender_count_female
## <chr> <chr> <int> <int>
## 1 baseline control 26 20
## 2 baseline treatment 33 24
## 3 endline control 37 27
## 4 endline treatment 33 20
summary_marital <- survey_data1 %>%
group_by(time, treat_control) %>%
summarize(
mstatus_single = sum(mstatus == "single", na.rm = TRUE),
mstatus_married = sum(mstatus == "married", na.rm = TRUE),
mstatus_divorced = sum(mstatus == "divorced", na.rm = TRUE)
)
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Display the summary table
summary_marital
## # A tibble: 4 × 5
## # Groups: time [2]
## time treat_control mstatus_single mstatus_married mstatus_divorced
## <chr> <chr> <int> <int> <int>
## 1 baseline control 16 30 0
## 2 baseline treatment 17 39 0
## 3 endline control 11 53 0
## 4 endline treatment 15 37 0
summary_educ <- survey_data1 %>%
group_by(time, treat_control) %>%
summarize(
edulevel_primary = sum(edulevel == "primary", na.rm = TRUE),
edulevel_secondary = sum(edulevel == "secondary", na.rm = TRUE),
edulevel_tertiary = sum(edulevel == "tertiary", na.rm = TRUE)
)
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Display the summary table
summary_educ
## # A tibble: 4 × 5
## # Groups: time [2]
## time treat_control edulevel_primary edulevel_secondary edulevel_tertiary
## <chr> <chr> <int> <int> <int>
## 1 baseline control 18 20 0
## 2 baseline treatment 25 19 0
## 3 endline control 16 34 0
## 4 endline treatment 10 31 0
summary_occup <- survey_data1 %>%
group_by(time, treat_control) %>%
summarize( total_occupations = n_distinct(occup),
most_common_occupations = toString(head(names(sort(table(occup), decreasing = TRUE)), 3))
)
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Display the summary table
summary_occup
## # A tibble: 4 × 4
## # Groups: time [2]
## time treat_control total_occupations most_common_occupations
## <chr> <chr> <int> <chr>
## 1 baseline control 9 business, student, farmer
## 2 baseline treatment 9 business, student, farmer
## 3 endline control 24 business, student, housewife
## 4 endline treatment 21 business, student, farmer
#Summary of MOI and anatlocation
summary_MOI <- survey_data1 %>%
group_by(time, treat_control) %>%
summarize( total_MOI = n_distinct(MOI),
most_common_MOI = toString(head(names(sort(table(MOI), decreasing = TRUE)), 3))
)
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Display the summary table
summary_MOI
## # A tibble: 4 × 4
## # Groups: time [2]
## time treat_control total_MOI most_common_MOI
## <chr> <chr> <int> <chr>
## 1 baseline control 11 rti, stab, assault
## 2 baseline treatment 5 rti, fall, stab
## 3 endline control 10 rti, fall, stab
## 4 endline treatment 9 rti, fall, stab
summary_anatlocation <- survey_data1 %>%
group_by(time, treat_control) %>%
summarize( total_anatlocation = n_distinct(anatlocation),
most_common_anatlocation = toString(head(names(sort(table(anatlocation), decreasing = TRUE)), 3))
)
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Display the summary table
summary_anatlocation
## # A tibble: 4 × 4
## # Groups: time [2]
## time treat_control total_anatlocation most_common_anatlocation
## <chr> <chr> <int> <chr>
## 1 baseline control 15 head, chest, extremities, extremiti…
## 2 baseline treatment 19 head, extremities, chest
## 3 endline control 21 head, chest, extremities
## 4 endline treatment 21 head, chest, chest, extremities
#Impute missing values in the numeric variables
# Load necessary library
library(mice)
# Define the variables with missing values
vars_with_missing <- c("HRinitial", "SBPinitial", "DBPinitial", "RRinitial",
"SPO2initial", "painscore", "GCS", "Edduration_mins",
"HRdispo", "SBPdispo", "DBPdispo", "RRdispo",
"SPO2dispo", "LOSdays", "ICU_LOS", "ward_LOS")
# Perform multiple imputation
imputed_data <- mice(survey_data1[, vars_with_missing], method = "pmm", m = 5)
##
## iter imp variable
## 1 1 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 1 2 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 1 3 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 1 4 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 1 5 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 2 1 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 2 2 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 2 3 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 2 4 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 2 5 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 3 1 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 3 2 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 3 3 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 3 4 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 3 5 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 4 1 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 4 2 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 4 3 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 4 4 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 4 5 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 5 1 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 5 2 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 5 3 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 5 4 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
## 5 5 painscore HRdispo SBPdispo DBPdispo RRdispo SPO2dispo ICU_LOS ward_LOS
# Combine the imputed data with the original dataset
survey_data1_imputed <- complete(imputed_data)
# Show the imputed data
#survey_data1_imputed
# Replace missing variables in survey_data1 with imputed values
survey_data1[, vars_with_missing] <- survey_data1_imputed
colnames(survey_data1)
## [1] "code" "county" "time"
## [4] "treat_control" "age" "gender"
## [7] "mstatus" "edulevel" "occup"
## [10] "dateinjury" "timeinj" "MOI"
## [13] "anatlocation" "ISS" "admdate"
## [16] "arrivaltime" "arrivalmode" "refby"
## [19] "receiveby" "triagecategory" "timeassmnt"
## [22] "Durationmins" "state" "HRinitial"
## [25] "SBPinitial" "DBPinitial" "RRinitial"
## [28] "SPO2initial" "painscore" "GCS"
## [31] "AVPU" "airwaystatus" "airwayinterven"
## [34] "spinestabilized" "breathingstatus" "breathinginterven"
## [37] "bleedingcontrol" "Fluidadmin" "meds"
## [40] "Eddeparture" "Edduration_mins" "HRdispo"
## [43] "SBPdispo" "DBPdispo" "RRdispo"
## [46] "SPO2dispo" "Admit" "hospdischarge"
## [49] "LOSdays" "state.1" "outcome"
## [52] "ICU_LOS" "ward_LOS"
#Factors influencing clinical management of trauma patients presenting at EDs in Western Kenya
# Logistic Regression
survey_data1$binary_outcome <- ifelse(survey_data1$outcome == "alive", 1, 0)
logistic_model <- glm(binary_outcome ~ time + treat_control+ICU_LOS,
family = binomial, data = survey_data1, control = list(maxit = 100))
# Check summary
summary(logistic_model)
##
## Call:
## glm(formula = binary_outcome ~ time + treat_control + ICU_LOS,
## family = binomial, data = survey_data1, control = list(maxit = 100))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.120665 0.511338 -0.236 0.813448
## timeendline 1.232655 0.469830 2.624 0.008700 **
## treat_controltreatment 0.001501 0.457169 0.003 0.997381
## ICU_LOS 0.346736 0.094988 3.650 0.000262 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 159.85 on 219 degrees of freedom
## Residual deviance: 129.75 on 216 degrees of freedom
## AIC: 137.75
##
## Number of Fisher Scoring iterations: 6
# Linear Regression
linear_model <- lm(ICU_LOS ~ age + LOSdays + HRinitial + SBPinitial + DBPinitial + airwayinterven + bleedingcontrol + Edduration_mins + RRdispo, data = survey_data1)
summary(linear_model)
##
## Call:
## lm(formula = ICU_LOS ~ age + LOSdays + HRinitial + SBPinitial +
## DBPinitial + airwayinterven + bleedingcontrol + Edduration_mins +
## RRdispo, data = survey_data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.7506 -1.6892 -0.0043 1.6530 5.6807
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.970627 2.196803 4.539 9.71e-06 ***
## age 0.006328 0.011637 0.544 0.587171
## LOSdays 0.208013 0.013271 15.674 < 2e-16 ***
## HRinitial -0.041978 0.011978 -3.505 0.000563 ***
## SBPinitial -0.063395 0.012235 -5.182 5.31e-07 ***
## DBPinitial -0.006157 0.019517 -0.315 0.752711
## airwayintervennone -0.875895 0.693054 -1.264 0.207752
## airwayintervenposition -0.384491 1.097914 -0.350 0.726552
## airwayintervensuction 0.066556 0.731688 0.091 0.927613
## airwayintervensuction, ETT -2.841872 2.373300 -1.197 0.232540
## bleedingcontrolbandage/ IVF 3.415223 2.250020 1.518 0.130612
## bleedingcontroldirect pressure 0.707037 1.351784 0.523 0.601520
## bleedingcontrolIVF 0.661785 0.690736 0.958 0.339164
## bleedingcontrolN/A 0.154561 0.813416 0.190 0.849488
## bleedingcontrolnone -0.179079 0.557471 -0.321 0.748365
## bleedingcontrolpressure 1.345522 0.483945 2.780 0.005944 **
## Edduration_mins 0.037809 0.004558 8.295 1.52e-14 ***
## RRdispo 0.068542 0.043906 1.561 0.120064
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.213 on 202 degrees of freedom
## Multiple R-squared: 0.7329, Adjusted R-squared: 0.7104
## F-statistic: 32.6 on 17 and 202 DF, p-value: < 2.2e-16
# Generalized Linear Model (GLM)
glm_model <- glm(LOSdays ~ ICU_LOS + HRinitial + SBPinitial + DBPinitial + airwayinterven + bleedingcontrol+ Edduration_mins + HRdispo + RRdispo, family = gaussian, data = survey_data1)
summary(glm_model)
##
## Call:
## glm(formula = LOSdays ~ ICU_LOS + HRinitial + SBPinitial + DBPinitial +
## airwayinterven + bleedingcontrol + Edduration_mins + HRdispo +
## RRdispo, family = gaussian, data = survey_data1)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -15.91708 7.74542 -2.055 0.041163 *
## ICU_LOS 2.71214 0.17214 15.755 < 2e-16 ***
## HRinitial 0.07040 0.04375 1.609 0.109175
## SBPinitial 0.10094 0.04587 2.200 0.028911 *
## DBPinitial 0.15570 0.06714 2.319 0.021391 *
## airwayintervennone 8.28663 2.37594 3.488 0.000598 ***
## airwayintervenposition 4.22802 3.86277 1.095 0.275015
## airwayintervensuction 2.87607 2.57694 1.116 0.265713
## airwayintervensuction, ETT 1.82175 8.42041 0.216 0.828934
## bleedingcontrolbandage/ IVF -3.19016 7.99388 -0.399 0.690259
## bleedingcontroldirect pressure -3.61301 4.70359 -0.768 0.443302
## bleedingcontrolIVF -0.79977 2.47754 -0.323 0.747175
## bleedingcontrolN/A 3.88405 2.87721 1.350 0.178547
## bleedingcontrolnone 1.32318 1.96939 0.672 0.502431
## bleedingcontrolpressure -2.09114 1.77363 -1.179 0.239777
## Edduration_mins -0.08818 0.01832 -4.813 2.91e-06 ***
## HRdispo -0.06970 0.03986 -1.749 0.081860 .
## RRdispo -0.19259 0.15711 -1.226 0.221682
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 61.16212)
##
## Null deviance: 36581 on 219 degrees of freedom
## Residual deviance: 12355 on 202 degrees of freedom
## AIC: 1548.5
##
## Number of Fisher Scoring iterations: 2
#Factors influencing clinical management of trauma patients presenting at EDs in Western Kenya
# Load necessary libraries
library(ggplot2)
# Your analysis and visualization code here
# For example:
# Assess the relationship between triage duration and other variables
triage_duration_model1 <- lm(Durationmins ~ time + treat_control+HRinitial + SBPinitial + DBPinitial, data = survey_data1)
# View summary of the model
summary(triage_duration_model1)
##
## Call:
## lm(formula = Durationmins ~ time + treat_control + HRinitial +
## SBPinitial + DBPinitial, data = survey_data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.473 -7.358 -2.054 6.964 38.296
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.01198 7.32242 3.416 0.000761 ***
## timeendline -5.73511 1.42718 -4.018 8.11e-05 ***
## treat_controltreatment -6.65236 1.43794 -4.626 6.44e-06 ***
## HRinitial 0.02602 0.05085 0.512 0.609400
## SBPinitial 0.07341 0.04974 1.476 0.141424
## DBPinitial -0.09818 0.08190 -1.199 0.231935
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.48 on 214 degrees of freedom
## Multiple R-squared: 0.1529, Adjusted R-squared: 0.1331
## F-statistic: 7.727 on 5 and 214 DF, p-value: 1.06e-06
# Linear Regression
triage_duration_model2 <- lm(Durationmins ~ age + LOSdays + HRinitial + SBPinitial + DBPinitial + airwayinterven + bleedingcontrol + Edduration_mins + RRdispo, data = survey_data1)
summary(triage_duration_model2)
##
## Call:
## lm(formula = Durationmins ~ age + LOSdays + HRinitial + SBPinitial +
## DBPinitial + airwayinterven + bleedingcontrol + Edduration_mins +
## RRdispo, data = survey_data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.670 -7.211 -1.633 6.694 35.631
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.96099 10.69624 2.147 0.03301 *
## age 0.09939 0.05666 1.754 0.08094 .
## LOSdays 0.06385 0.06462 0.988 0.32427
## HRinitial -0.03894 0.05832 -0.668 0.50511
## SBPinitial 0.08835 0.05957 1.483 0.13961
## DBPinitial -0.11970 0.09503 -1.260 0.20925
## airwayintervennone -5.26179 3.37448 -1.559 0.12049
## airwayintervenposition -5.50784 5.34575 -1.030 0.30409
## airwayintervensuction -7.16109 3.56259 -2.010 0.04575 *
## airwayintervensuction, ETT -16.36221 11.55561 -1.416 0.15833
## bleedingcontrolbandage/ IVF 8.17158 10.95536 0.746 0.45660
## bleedingcontroldirect pressure -7.46137 6.58185 -1.134 0.25829
## bleedingcontrolIVF -7.08133 3.36320 -2.106 0.03648 *
## bleedingcontrolN/A 9.02988 3.96053 2.280 0.02365 *
## bleedingcontrolnone -1.70944 2.71433 -0.630 0.52955
## bleedingcontrolpressure 1.00944 2.35633 0.428 0.66882
## Edduration_mins 0.07827 0.02219 3.527 0.00052 ***
## RRdispo -0.15100 0.21378 -0.706 0.48079
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.77 on 202 degrees of freedom
## Multiple R-squared: 0.1549, Adjusted R-squared: 0.08379
## F-statistic: 2.178 on 17 and 202 DF, p-value: 0.005795
library(ggplot2)
# Visualize the relationship between triage duration and other variables for triage_duration_model1 using a boxplot
ggplot(survey_data1, aes(x = time, y = Durationmins)) +
geom_boxplot(fill = "skyblue", color = "darkblue") +
labs(title = "Relationship between Time and Triage Duration (Model 1)",
x = "Time",
y = "Triage Duration (minutes)")

# Visualize the relationship between triage duration and other variables for triage_duration_model2 with improved color and clarity
ggplot(survey_data1, aes(x = age, y = Durationmins)) +
geom_point(color = "darkred", alpha = 0.6) +
geom_smooth(method = "lm", color = "blue") +
labs(title = "Relationship between Age and Triage Duration (Model 2)",
x = "Age",
y = "Triage Duration (minutes)")
## `geom_smooth()` using formula = 'y ~ x'

# Visualize the relationship between triage duration and other variables for triage_duration_model3 with improved color and clarity
ggplot(survey_data1, aes(x = LOSdays, y = Durationmins)) +
geom_point(color = "darkgreen", alpha = 0.6) +
geom_smooth(method = "lm", color = "orange") +
labs(title = "Relationship between LOS Days and Triage Duration (Model 3)",
x = "LOS Days",
y = "Triage Duration (minutes)")
## `geom_smooth()` using formula = 'y ~ x'

# Visualize the relationship between triage duration and other variables for triage_duration_model1 using a boxplot
ggplot(survey_data1, aes(x = treat_control, y = Durationmins)) +
geom_boxplot(fill = "skyblue", color = "darkblue") +
labs(title = "Relationship between treat_control and Triage Duration (Model 4)",
x = "treat_control",
y = "Triage Duration (minutes)")

#Patient outcomes in trauma patients presenting at EDs in Western Kenya
#ICU LoS
# Filter data for treatment and control groups
treatment_icu_los <- na.omit(survey_data1$ICU_LOS[survey_data1$treat_control == "treatment"])
control_icu_los <- na.omit(survey_data1$ICU_LOS[survey_data1$treat_control == "control"])
# Combine the ICU LOS data and group labels into a data frame
icu_los_data <- data.frame(
ICU_LOS = c(treatment_icu_los, control_icu_los),
Group = factor(rep(c("Treatment", "Control"), c(length(treatment_icu_los), length(control_icu_los))), levels = c("Treatment", "Control"))
)
# Create a box plot to visualize the distribution of ICU LOS by group
ggplot(icu_los_data, aes(x = Group, y = ICU_LOS, fill = Group)) +
geom_boxplot() +
labs(title = "Comparison of ICU Length of Stay between Treatment and Control Groups",
x = "Group",
y = "ICU Length of Stay (Days)",
fill = "Group") +
theme_minimal() +
theme(legend.position = "none")

# Hospital LoS
# Filter data for treatment and control groups
treatment_hospital_los <- na.omit(survey_data1$LOSdays[survey_data1$treat_control == "treatment"])
control_hospital_los <- na.omit(survey_data1$LOSdays[survey_data1$treat_control == "control"])
# Combine the Hospital LOS data and group labels into a data frame
hospital_los_data <- data.frame(
LOSdays = c(treatment_hospital_los, control_hospital_los),
Group = factor(rep(c("Treatment", "Control"), c(length(treatment_hospital_los), length(control_hospital_los))), levels = c("Treatment", "Control"))
)
# Create a box plot to visualize the distribution of Hospital LOS by group
ggplot(hospital_los_data, aes(x = Group, y = LOSdays, fill = Group)) +
geom_boxplot() +
labs(title = "Comparison of Hospital Length of Stay between Treatment and Control Groups",
x = "Group",
y = "Hospital Length of Stay (Days)",
fill = "Group") +
theme_minimal() +
theme(legend.position = "none")

#To test the effect of MTCP at EDs in Western Kenya (association)
# Perform t-test for ICU LOS
t_test_icu_los <- t.test(survey_data1$ICU_LOS ~ survey_data1$treat_control)
# Print the result
print(t_test_icu_los)
##
## Welch Two Sample t-test
##
## data: survey_data1$ICU_LOS by survey_data1$treat_control
## t = 3.3028, df = 215.89, p-value = 0.00112
## alternative hypothesis: true difference in means between group control and group treatment is not equal to 0
## 95 percent confidence interval:
## 0.7221408 2.8596774
## sample estimates:
## mean in group control mean in group treatment
## 7.190909 5.400000
# Create an interaction term between treatment and time
survey_data1$treat_control_time <- interaction(survey_data1$treat_control, survey_data1$time)
# Fit DiD regression model
did_model <- lm(LOSdays ~ treat_control + time + treat_control_time, data = survey_data1)
# Print the summary
summary(did_model)
##
## Call:
## lm(formula = LOSdays ~ treat_control + time + treat_control_time,
## data = survey_data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.453 -7.239 0.030 6.453 41.298
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.239 1.888 11.778 <2e-16 ***
## treat_controltreatment -5.906 2.379 -2.483 0.0138 *
## timeendline 2.214 2.476 0.894 0.3721
## treat_control_timetreatment.baseline 7.369 3.479 2.118 0.0353 *
## treat_control_timecontrol.endline NA NA NA NA
## treat_control_timetreatment.endline NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.81 on 216 degrees of freedom
## Multiple R-squared: 0.03155, Adjusted R-squared: 0.0181
## F-statistic: 2.346 on 3 and 216 DF, p-value: 0.07382
# Fit DiD regression model
did_model <- lm(LOSdays ~ treat_control + time + treat_control_time, data = survey_data1)
# Extract coefficients from the model
coefficients <- coef(did_model)
# Extract standard errors of coefficients
standard_errors <- summary(did_model)$coefficients[, "Std. Error"]
# Calculate Cohen's d for the treatment effect
treatment_effect <- coefficients["treat_controltreatment"] # Coefficient for the treatment variable
standard_error_treatment <- standard_errors["treat_controltreatment"] # Standard error for the treatment coefficient
n <- nrow(survey_data1) # Sample size
cohen_d <- treatment_effect / standard_error_treatment
# Print Cohen's d
print(cohen_d)
## treat_controltreatment
## -2.483036
library(ggplot2)
# Visualize the change in outcome variables over time for the treatment and control groups
ggplot(survey_data1, aes(x = time, y = LOSdays, color = factor(treat_control))) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Change in LOS Days over Time by Treatment Group",
x = "Time",
y = "LOS Days",
color = "Treatment Group") +
scale_color_manual(values = c("blue", "red")) # Customizing colors for treatment and control groups
## `geom_smooth()` using formula = 'y ~ x'

# Perform Difference-in-Differences (DiD) analysis
did_model <- lm(LOSdays ~ treat_control * time, data = survey_data1)
# Print the summary
summary(did_model)
##
## Call:
## lm(formula = LOSdays ~ treat_control * time, data = survey_data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.453 -7.239 0.030 6.453 41.298
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.239 1.888 11.778 <2e-16 ***
## treat_controltreatment 1.463 2.538 0.576 0.5651
## timeendline 2.214 2.476 0.894 0.3721
## treat_controltreatment:timeendline -7.369 3.479 -2.118 0.0353 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.81 on 216 degrees of freedom
## Multiple R-squared: 0.03155, Adjusted R-squared: 0.0181
## F-statistic: 2.346 on 3 and 216 DF, p-value: 0.07382
# Calculate effect sizes and perform inferential analysis
# Extract coefficients from the model
coefficients <- coef(did_model)
# Extract standard errors of coefficients
standard_errors <- summary(did_model)$coefficients[, "Std. Error"]
# Calculate Cohen's d for the treatment effect
treatment_effect <- coefficients["treat_controlTRUE:time1"] - coefficients["treat_controlTRUE:time0"]
# Coefficient for the treatment*time interaction
standard_error_treatment <- standard_errors["treat_controlTRUE:time1"] + standard_errors["treat_controlTRUE:time0"]
# Sum of standard errors for the treatment*time interaction
n <- nrow(survey_data1) # Sample size
cohen_d <- treatment_effect / standard_error_treatment
# Print Cohen's d
print(paste("Cohen's d for treatment effect:", round(cohen_d, 2)))
## [1] "Cohen's d for treatment effect: NA"
library(ggplot2)
library(dplyr)
# Visualize the change in LOS over time by treatment group
ggplot(survey_data1, aes(x = time, y = LOSdays, color = factor(treat_control))) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
facet_wrap(~treat_control, ncol = 1) + # Separate plots for treatment and control groups
labs(title = "Change in Length of Stay (LOS) Over Time by Treatment Group",
x = "Time",
y = "Length of Stay (LOS) Days",
color = "Treatment Group") +
scale_color_manual(values = c("blue", "red")) + # Customizing colors for treatment and control groups
theme_minimal() # Applying a minimal theme for better clarity
## `geom_smooth()` using formula = 'y ~ x'

# Perform Difference-in-Differences (DiD) analysis
did_model <- lm(LOSdays ~ treat_control * time, data = survey_data1)
# Print the summary
summary(did_model)
##
## Call:
## lm(formula = LOSdays ~ treat_control * time, data = survey_data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.453 -7.239 0.030 6.453 41.298
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.239 1.888 11.778 <2e-16 ***
## treat_controltreatment 1.463 2.538 0.576 0.5651
## timeendline 2.214 2.476 0.894 0.3721
## treat_controltreatment:timeendline -7.369 3.479 -2.118 0.0353 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.81 on 216 degrees of freedom
## Multiple R-squared: 0.03155, Adjusted R-squared: 0.0181
## F-statistic: 2.346 on 3 and 216 DF, p-value: 0.07382
# Calculate effect sizes and perform inferential analysis
# Extract coefficients from the model
coefficients <- coef(did_model)
# Extract standard errors of coefficients
standard_errors <- summary(did_model)$coefficients[, "Std. Error"]
# Calculate t-values and p-values for treatment*time interaction
t_values <- coefficients["treat_controlTRUE:time1"] / standard_errors["treat_controlTRUE:time1"]
p_values <- 2 * pt(abs(t_values), df = nrow(survey_data1) - length(coefficients))
# Print t-values and p-values
print("Treatment*time Interaction:")
## [1] "Treatment*time Interaction:"
print(paste("t-value:", round(t_values, 2)))
## [1] "t-value: NA"
print(paste("p-value:", round(p_values, 4)))
## [1] "p-value: NA"
# Check assumptions of DiD model
# Residuals vs Fitted plot
plot(residuals(did_model), fitted(did_model),
xlab = "Fitted values", ylab = "Residuals",
main = "Residuals vs Fitted Plot for DiD Model")
abline(h = 0, col = "red")

# Normal Q-Q plot
qqnorm(residuals(did_model))
qqline(residuals(did_model))

# Shapiro-Wilk test for normality of residuals
shapiro.test(residuals(did_model))
##
## Shapiro-Wilk normality test
##
## data: residuals(did_model)
## W = 0.94552, p-value = 2.375e-07
# Refinement of the DiD Model
did_model_refined <- lm(LOSdays ~ treat_control * time + ICU_LOS + HRinitial + SBPinitial + DBPinitial + airwayinterven + bleedingcontrol + Edduration_mins + HRdispo + RRdispo, data = survey_data1)
# Check model summary
summary(did_model_refined)
##
## Call:
## lm(formula = LOSdays ~ treat_control * time + ICU_LOS + HRinitial +
## SBPinitial + DBPinitial + airwayinterven + bleedingcontrol +
## Edduration_mins + HRdispo + RRdispo, data = survey_data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.4324 -4.9947 -0.0152 4.5810 22.7349
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.91500 7.81601 -2.292 0.022947 *
## treat_controltreatment 2.57852 1.59314 1.619 0.107135
## timeendline 0.72714 1.52054 0.478 0.633025
## ICU_LOS 2.75372 0.17637 15.613 < 2e-16 ***
## HRinitial 0.06671 0.04373 1.526 0.128713
## SBPinitial 0.10619 0.04609 2.304 0.022268 *
## DBPinitial 0.15292 0.06710 2.279 0.023736 *
## airwayintervennone 8.29011 2.38174 3.481 0.000615 ***
## airwayintervenposition 3.89525 3.86928 1.007 0.315296
## airwayintervensuction 2.73867 2.58292 1.060 0.290293
## airwayintervensuction, ETT 0.90494 8.43627 0.107 0.914684
## bleedingcontrolbandage/ IVF -4.29115 8.01787 -0.535 0.593111
## bleedingcontroldirect pressure -3.14696 4.70375 -0.669 0.504251
## bleedingcontrolIVF -0.50744 2.48992 -0.204 0.838719
## bleedingcontrolN/A 3.89298 2.87174 1.356 0.176758
## bleedingcontrolnone 1.13561 1.96699 0.577 0.564367
## bleedingcontrolpressure -2.17828 1.77680 -1.226 0.221663
## Edduration_mins -0.08305 0.01854 -4.479 1.26e-05 ***
## HRdispo -0.07037 0.03978 -1.769 0.078399 .
## RRdispo -0.19056 0.15729 -1.211 0.227147
## treat_controltreatment:timeendline -0.61169 2.20302 -0.278 0.781561
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.8 on 199 degrees of freedom
## Multiple R-squared: 0.669, Adjusted R-squared: 0.6357
## F-statistic: 20.11 on 20 and 199 DF, p-value: < 2.2e-16
# Additional Tests - Bootstrap
library(boot)
##
## Attaching package: 'boot'
## The following object is masked from 'package:lattice':
##
## melanoma
## The following object is masked from 'package:psych':
##
## logit
## The following object is masked from 'package:survival':
##
## aml
# Define function to estimate treatment effect in DiD model
did_estimate <- function(data, indices) {
subset <- data[indices, ]
model <- lm(LOSdays ~ treat_control * time, data = subset)
treatment_effect <- coef(model)["treat_controltreatment:timeendline"]
return(treatment_effect)
}
# Perform bootstrap
boot_results <- boot(data = survey_data1, statistic = did_estimate, R = 1000)
# Get bootstrap confidence intervals
boot_ci <- boot.ci(boot_results, type = "bca")
# Print bootstrap confidence intervals
boot_ci
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_results, type = "bca")
##
## Intervals :
## Level BCa
## 95% (-13.916, -0.392 )
## Calculations and Intervals on Original Scale
# Sensitivity Analysis (e.g., controlling for additional covariates, subgroup analysis)
# Visualizations
# Treatment effect over time
# Calculate mean LOS and confidence intervals for each group at each time point
library(dplyr)
library(tidyr)
mean_ci <- survey_data1 %>%
group_by(time, treat_control) %>%
summarise(mean_LOS = mean(LOSdays),
lower_ci = mean(LOSdays) - qt(0.975, length(LOSdays) - 1) * (sd(LOSdays) / sqrt(length(LOSdays))),
upper_ci = mean(LOSdays) + qt(0.975, length(LOSdays) - 1) * (sd(LOSdays) / sqrt(length(LOSdays)))) %>%
ungroup()
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Plot treatment effect over time with error bars using bar plot
ggplot(mean_ci, aes(x = time, y = mean_LOS, fill = treat_control)) +
geom_bar(stat = "identity", position = "dodge") +
geom_errorbar(aes(ymin = lower_ci, ymax = upper_ci), width = 0.2, position = position_dodge(width = 0.9)) +
labs(title = "Treatment Effect Over Time",
x = "Time",
y = "Mean Length of Stay",
fill = "Group") +
theme_minimal()

# Balance plot for covariates
covariate_balance_plot <- function(data, covariate) {
ggplot(data, aes(x = treat_control, y = !!sym(covariate), fill = treat_control)) +
geom_boxplot() +
labs(title = paste("Balance Plot for", covariate),
x = "Group",
y = covariate,
fill = "Group") +
theme_minimal()
}
covariate_balance_plot(survey_data1, "ICU_LOS")

# Distribution of outcome variable
ggplot(survey_data1, aes(x = LOSdays, fill = treat_control)) +
geom_histogram(binwidth = 1, position = "identity", alpha = 0.5) +
labs(title = "Distribution of Length of Stay",
x = "Length of Stay",
y = "Frequency",
fill = "Group") +
theme_minimal()

options(repos = c(CRAN = "https://cran.r-project.org"))