# Load necessary libraries
library(survey)      # For handling complex survey design
## Warning: package 'survey' was built under R version 4.3.3
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(mice)        # For multiple imputation of missing data
## Warning: package 'mice' was built under R version 4.3.3
## 
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
## 
##     filter
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
library(dplyr)       # For data manipulation
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)     # For data visualization
## Warning: package 'ggplot2' was built under R version 4.3.3
library(psych)       # For reliability analysis
## Warning: package 'psych' was built under R version 4.3.3
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(caret)       # For resampling and imbalanced data handling
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 4.3.3
## 
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
## 
##     cluster
library(knitr)       # For dynamic report generation
## Warning: package 'knitr' was built under R version 4.3.3
library(lattice)     # For advanced data visualization
library(tidyverse)   # For data manipulation and visualization
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'tibble' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'stringr' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ psych::%+%()    masks ggplot2::%+%()
## ✖ psych::alpha()  masks ggplot2::alpha()
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks mice::filter(), stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ purrr::lift()   masks caret::lift()
## ✖ tidyr::pack()   masks Matrix::pack()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(likert)      # For Likert scale analysis
## Warning: package 'likert' was built under R version 4.3.3
## Loading required package: xtable
## Warning: package 'xtable' was built under R version 4.3.3
## 
## Attaching package: 'likert'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
library(MASS)        # For advanced statistical analysis
## Warning: package 'MASS' was built under R version 4.3.3
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library(viridis)     # For color-blind friendly visualization
## Warning: package 'viridis' was built under R version 4.3.3
## Loading required package: viridisLite
## Warning: package 'viridisLite' was built under R version 4.3.3
library(here)        # For managing file paths
## Warning: package 'here' was built under R version 4.3.3
## here() starts at C:/nick/practice
library(flextable)   # For creating flexible tables
## Warning: package 'flextable' was built under R version 4.3.3
## 
## Attaching package: 'flextable'
## 
## The following object is masked from 'package:xtable':
## 
##     align
## 
## The following object is masked from 'package:purrr':
## 
##     compose
library(devtools)    # For installing and managing packages from GitHub
## Warning: package 'devtools' was built under R version 4.3.3
## Loading required package: usethis
## Warning: package 'usethis' was built under R version 4.3.3
library(GPArotation) # For factor rotation
## Warning: package 'GPArotation' was built under R version 4.3.3
## 
## Attaching package: 'GPArotation'
## 
## The following objects are masked from 'package:psych':
## 
##     equamax, varimin
library(ufs)         # For user-friendly science
# Import the CSV file into R
survey_data <- read.csv("C:/nick/practice/traumadata/traumaData.csv")

# View the structure of the imported data
str(survey_data)
## 'data.frame':    256 obs. of  53 variables:
##  $ code             : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ county           : chr  "Migori" "Migori" "Migori" "Migori" ...
##  $ time             : chr  "endline" "endline" "endline" "endline" ...
##  $ group            : chr  "control" "control" "control" "control" ...
##  $ age              : int  30 38 34 28 32 41 22 29 33 45 ...
##  $ gender           : chr  "male" "male" "female" "female" ...
##  $ mstatus          : chr  "married" "married" "married" "single" ...
##  $ edulevel         : chr  "secondary" "secondary" "secondary" "college" ...
##  $ occup            : chr  "mason" "tailor" "business" "business" ...
##  $ dateinjury       : chr  "20/8/2022" "15/8/2022" "2/9/2022" "8/9/2022" ...
##  $ timeinj          : chr  "1500hrs" "1000hrs" "1300hrs" "0900hrs" ...
##  $ MOI              : chr  "fall" "rti" "rti" "stab" ...
##  $ anatlocation     : chr  "head" "chest" "extremities, pelvis" "chest" ...
##  $ ISS              : int  16 15 15 15 15 15 15 16 15 16 ...
##  $ admdate          : chr  "20/8/2022" "15/8/2022" "2/9/2022" "8/9/2022" ...
##  $ arrivaltime      : chr  "1530hrs" "1040hrs" "1400hrs" "1000hrs" ...
##  $ arrivalmode      : chr  "car" "car" "motorbike" "taxi" ...
##  $ refby            : chr  "self" "self" "self" "nurse" ...
##  $ receiveby        : chr  "rco" "nurse" "nurse" "mo" ...
##  $ triagecategory   : chr  "urgent" "urgent" "urgent" "immediate" ...
##  $ timeassmnt       : chr  "1550hrs" "1055hrs" "1430hrs" "1025hrs" ...
##  $ Durationmins     : int  20 15 30 25 20 25 30 30 30 40 ...
##  $ state            : chr  "alive" "alive" "alive" "alive" ...
##  $ HRinitial        : int  80 88 75 90 80 95 90 90 86 105 ...
##  $ SBPinitial       : int  120 130 125 130 120 125 115 110 130 110 ...
##  $ DBPinitial       : int  80 76 80 80 75 74 78 80 60 70 ...
##  $ RRinitial        : int  20 18 22 20 22 21 18 18 22 20 ...
##  $ SPO2initial      : int  94 93 90 95 95 93 94 94 90 95 ...
##  $ painscore        : int  10 10 10 9 10 10 8 9 10 10 ...
##  $ GCS              : int  14 13 15 15 13 12 15 13 14 15 ...
##  $ AVPU             : chr  "a" "v" "a" "a" ...
##  $ airwaystatus     : chr  "patent" "patent" "patent" "patent" ...
##  $ airwayinterven   : chr  "none" "none" "none" "none" ...
##  $ spinestabilized  : chr  "no" "no" "no" "no" ...
##  $ breathingstatus  : chr  "spont" "spont" "spont" "spont" ...
##  $ breathinginterven: chr  "none" "none" "none" "none" ...
##  $ bleedingcontrol  : chr  "IVF" "bandage" "pressure" "bandage" ...
##  $ Fluidadmin       : chr  "NS 500mls" "none" "NS 500mls" "NS 500mls" ...
##  $ meds             : chr  "tramadol" "tramadol" "tramadol" "analgesics" ...
##  $ Eddeparture      : chr  "1730hrs" "1200hrs" "1600hrs" "1110hrs" ...
##  $ Edduration_mins  : int  100 65 90 45 50 125 100 80 120 130 ...
##  $ HRdispo          : int  95 78 95 87 78 96 95 60 95 70 ...
##  $ SBPdispo         : int  93 112 105 127 130 110 105 100 133 120 ...
##  $ DBPdispo         : int  70 80 85 69 78 70 80 55 80 70 ...
##  $ RRdispo          : int  20 16 21 19 16 15 18 18 21 20 ...
##  $ SPO2dispo        : int  95 95 93 94 94 93 95 93 94 95 ...
##  $ Admit            : chr  "icu" "ward" "ward" "ward" ...
##  $ hospdischarge    : chr  "21/9/2022" "14/9/2022" "2/10/2022" "23/9/2022" ...
##  $ LOSdays          : int  30 29 30 15 17 22 26 34 29 30 ...
##  $ state.1          : chr  "stable" "stable" "stable" "stable" ...
##  $ outcome          : chr  "alive" "alive" "alive" "alive" ...
##  $ ICU_LOS          : int  14 NA NA NA NA 10 NA NA NA NA ...
##  $ ward_LOS         : int  16 NA NA NA NA 12 NA NA NA NA ...
# View the first few rows of the data
head(survey_data)
##   code county    time   group age gender mstatus  edulevel         occup
## 1    1 Migori endline control  30   male married secondary         mason
## 2    2 Migori endline control  38   male married secondary        tailor
## 3    3 Migori endline control  34 female married secondary      business
## 4    4 Migori endline control  28 female  single   college      business
## 5    5 Migori endline control  32   male married secondary        driver
## 6    6 Migori endline control  41   male married   college administrator
##   dateinjury timeinj  MOI        anatlocation ISS   admdate arrivaltime
## 1  20/8/2022 1500hrs fall                head  16 20/8/2022     1530hrs
## 2  15/8/2022 1000hrs  rti               chest  15 15/8/2022     1040hrs
## 3   2/9/2022 1300hrs  rti extremities, pelvis  15  2/9/2022     1400hrs
## 4   8/9/2022 0900hrs stab               chest  15  8/9/2022     1000hrs
## 5  13/9/2022 2000hrs  rti                head  15 13/9/2022     2030hrs
## 6  17/9/2022 1000hrs fall          head, face  15 17/9/2022     1100hrs
##   arrivalmode refby receiveby triagecategory timeassmnt Durationmins state
## 1         car  self       rco         urgent    1550hrs           20 alive
## 2         car  self     nurse         urgent    1055hrs           15 alive
## 3   motorbike  self     nurse         urgent    1430hrs           30 alive
## 4        taxi nurse        mo      immediate    1025hrs           25 alive
## 5   motorbike  self     nurse         urgent    2300hrs           20 alive
## 6   motorbike  self     nurse      immediate    1325hrs           25 alive
##   HRinitial SBPinitial DBPinitial RRinitial SPO2initial painscore GCS AVPU
## 1        80        120         80        20          94        10  14    a
## 2        88        130         76        18          93        10  13    v
## 3        75        125         80        22          90        10  15    a
## 4        90        130         80        20          95         9  15    a
## 5        80        120         75        22          95        10  13    a
## 6        95        125         74        21          93        10  12    p
##   airwaystatus airwayinterven spinestabilized breathingstatus breathinginterven
## 1       patent           none              no           spont              none
## 2       patent           none              no           spont              none
## 3       patent           none              no           spont              none
## 4       patent           none              no           spont              none
## 5       patent           none              no           spont              none
## 6   obstructed           none              no    supplemental              none
##   bleedingcontrol Fluidadmin       meds Eddeparture Edduration_mins HRdispo
## 1             IVF  NS 500mls   tramadol     1730hrs             100      95
## 2         bandage       none   tramadol     1200hrs              65      78
## 3        pressure  NS 500mls   tramadol     1600hrs              90      95
## 4         bandage  NS 500mls analgesics     1110hrs              45      87
## 5             IVF  NS 500mls   tramadol     2350hrs              50      78
## 6            none   mannitol   tramadol     1530hrs             125      96
##   SBPdispo DBPdispo RRdispo SPO2dispo Admit hospdischarge LOSdays state.1
## 1       93       70      20        95   icu     21/9/2022      30  stable
## 2      112       80      16        95  ward     14/9/2022      29  stable
## 3      105       85      21        93  ward     2/10/2022      30  stable
## 4      127       69      19        94  ward     23/9/2022      15  stable
## 5      130       78      16        94  ward     30/9/2022      17  stable
## 6      110       70      15        93   icu     9/10/2022      22  stable
##   outcome ICU_LOS ward_LOS
## 1   alive      14       16
## 2   alive      NA       NA
## 3   alive      NA       NA
## 4   alive      NA       NA
## 5   alive      NA       NA
## 6   alive      10       12
# Load the dplyr library
library(dplyr)

# Convert date columns to Date format
survey_data <- survey_data %>%
  mutate(dateinjury = as.Date(dateinjury),
         admdate = as.Date(admdate),
         hospdischarge = as.Date(hospdischarge))
#summary of demographics
library(dplyr)

# Read the survey data from the CSV file
survey_data1 <- read.csv("survey_data1.csv")

# Summarize variables by time and treat_control
summary_age <- survey_data1 %>%
  group_by(time, treat_control) %>%
  summarize(
    mean_age = mean(age, na.rm = TRUE),
  )
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Display the summary table
summary_age
## # A tibble: 4 × 3
## # Groups:   time [2]
##   time     treat_control mean_age
##   <chr>    <chr>            <dbl>
## 1 baseline control           39.0
## 2 baseline treatment         40.0
## 3 endline  control           41.9
## 4 endline  treatment         39.3
summary_gender <- survey_data1 %>%
  group_by(time, treat_control) %>%
  summarize(
    gender_count_male = sum(gender == "male", na.rm = TRUE),
    gender_count_female = sum(gender == "female", na.rm = TRUE)
  )
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Display the summary table
summary_gender
## # A tibble: 4 × 4
## # Groups:   time [2]
##   time     treat_control gender_count_male gender_count_female
##   <chr>    <chr>                     <int>               <int>
## 1 baseline control                      26                  20
## 2 baseline treatment                    33                  24
## 3 endline  control                      37                  27
## 4 endline  treatment                    33                  20
summary_marital <- survey_data1 %>%
  group_by(time, treat_control) %>%
  summarize(
    mstatus_single = sum(mstatus == "single", na.rm = TRUE),
    mstatus_married = sum(mstatus == "married", na.rm = TRUE),
    mstatus_divorced = sum(mstatus == "divorced", na.rm = TRUE)
      )
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Display the summary table
summary_marital
## # A tibble: 4 × 5
## # Groups:   time [2]
##   time     treat_control mstatus_single mstatus_married mstatus_divorced
##   <chr>    <chr>                  <int>           <int>            <int>
## 1 baseline control                   16              30                0
## 2 baseline treatment                 17              39                0
## 3 endline  control                   11              53                0
## 4 endline  treatment                 15              37                0
summary_educ <- survey_data1 %>%
  group_by(time, treat_control) %>%
  summarize(
    edulevel_primary = sum(edulevel == "primary", na.rm = TRUE),
    edulevel_secondary = sum(edulevel == "secondary", na.rm = TRUE),
    edulevel_tertiary = sum(edulevel == "tertiary", na.rm = TRUE)
  )
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Display the summary table
summary_educ
## # A tibble: 4 × 5
## # Groups:   time [2]
##   time     treat_control edulevel_primary edulevel_secondary edulevel_tertiary
##   <chr>    <chr>                    <int>              <int>             <int>
## 1 baseline control                     18                 20                 0
## 2 baseline treatment                   25                 19                 0
## 3 endline  control                     16                 34                 0
## 4 endline  treatment                   10                 31                 0
summary_occup <- survey_data1 %>%
  group_by(time, treat_control) %>%
  summarize(  total_occupations = n_distinct(occup),
    most_common_occupations = toString(head(names(sort(table(occup), decreasing = TRUE)), 3))
  )
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Display the summary table
summary_occup
## # A tibble: 4 × 4
## # Groups:   time [2]
##   time     treat_control total_occupations most_common_occupations     
##   <chr>    <chr>                     <int> <chr>                       
## 1 baseline control                       9 business, student, farmer   
## 2 baseline treatment                     9 business, student, farmer   
## 3 endline  control                      24 business, student, housewife
## 4 endline  treatment                    21 business, student, farmer
#Summary of MOI and anatlocation
summary_MOI <- survey_data1 %>%
  group_by(time, treat_control) %>%
  summarize(  total_MOI = n_distinct(MOI),
    most_common_MOI = toString(head(names(sort(table(MOI), decreasing = TRUE)), 3))
  )
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Display the summary table
summary_MOI
## # A tibble: 4 × 4
## # Groups:   time [2]
##   time     treat_control total_MOI most_common_MOI   
##   <chr>    <chr>             <int> <chr>             
## 1 baseline control              11 rti, stab, assault
## 2 baseline treatment             5 rti, fall, stab   
## 3 endline  control              10 rti, fall, stab   
## 4 endline  treatment             9 rti, fall, stab
summary_anatlocation <- survey_data1 %>%
  group_by(time, treat_control) %>%
  summarize(  total_anatlocation = n_distinct(anatlocation),
    most_common_anatlocation = toString(head(names(sort(table(anatlocation), decreasing = TRUE)), 3))
  )
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Display the summary table
summary_anatlocation
## # A tibble: 4 × 4
## # Groups:   time [2]
##   time     treat_control total_anatlocation most_common_anatlocation            
##   <chr>    <chr>                      <int> <chr>                               
## 1 baseline control                       15 head, chest, extremities, extremiti…
## 2 baseline treatment                     19 head, extremities, chest            
## 3 endline  control                       21 head, chest, extremities            
## 4 endline  treatment                     21 head, chest, chest, extremities
#Impute missing values in the numeric variables
# Load necessary library
library(mice)

# Define the variables with missing values
vars_with_missing <- c("HRinitial", "SBPinitial", "DBPinitial", "RRinitial", 
                       "SPO2initial", "painscore", "GCS", "Edduration_mins",
                       "HRdispo", "SBPdispo", "DBPdispo", "RRdispo", 
                       "SPO2dispo", "LOSdays", "ICU_LOS", "ward_LOS")

# Perform multiple imputation
imputed_data <- mice(survey_data1[, vars_with_missing], method = "pmm", m = 5)
## 
##  iter imp variable
##   1   1  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   1   2  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   1   3  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   1   4  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   1   5  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   2   1  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   2   2  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   2   3  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   2   4  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   2   5  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   3   1  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   3   2  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   3   3  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   3   4  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   3   5  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   4   1  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   4   2  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   4   3  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   4   4  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   4   5  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   5   1  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   5   2  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   5   3  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   5   4  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   5   5  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
# Combine the imputed data with the original dataset
survey_data1_imputed <- complete(imputed_data)

# Show the imputed data
#survey_data1_imputed
# Replace missing variables in survey_data1 with imputed values
survey_data1[, vars_with_missing] <- survey_data1_imputed
colnames(survey_data1)
##  [1] "code"              "county"            "time"             
##  [4] "treat_control"     "age"               "gender"           
##  [7] "mstatus"           "edulevel"          "occup"            
## [10] "dateinjury"        "timeinj"           "MOI"              
## [13] "anatlocation"      "ISS"               "admdate"          
## [16] "arrivaltime"       "arrivalmode"       "refby"            
## [19] "receiveby"         "triagecategory"    "timeassmnt"       
## [22] "Durationmins"      "state"             "HRinitial"        
## [25] "SBPinitial"        "DBPinitial"        "RRinitial"        
## [28] "SPO2initial"       "painscore"         "GCS"              
## [31] "AVPU"              "airwaystatus"      "airwayinterven"   
## [34] "spinestabilized"   "breathingstatus"   "breathinginterven"
## [37] "bleedingcontrol"   "Fluidadmin"        "meds"             
## [40] "Eddeparture"       "Edduration_mins"   "HRdispo"          
## [43] "SBPdispo"          "DBPdispo"          "RRdispo"          
## [46] "SPO2dispo"         "Admit"             "hospdischarge"    
## [49] "LOSdays"           "state.1"           "outcome"          
## [52] "ICU_LOS"           "ward_LOS"
#Factors influencing clinical management of trauma patients presenting at EDs in Western Kenya

# Logistic Regression
survey_data1$binary_outcome <- ifelse(survey_data1$outcome == "alive", 1, 0)
logistic_model <- glm(binary_outcome ~ time + treat_control+ICU_LOS, 
                      family = binomial, data = survey_data1, control = list(maxit = 100))

# Check summary
summary(logistic_model)
## 
## Call:
## glm(formula = binary_outcome ~ time + treat_control + ICU_LOS, 
##     family = binomial, data = survey_data1, control = list(maxit = 100))
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            -0.120665   0.511338  -0.236 0.813448    
## timeendline             1.232655   0.469830   2.624 0.008700 ** 
## treat_controltreatment  0.001501   0.457169   0.003 0.997381    
## ICU_LOS                 0.346736   0.094988   3.650 0.000262 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 159.85  on 219  degrees of freedom
## Residual deviance: 129.75  on 216  degrees of freedom
## AIC: 137.75
## 
## Number of Fisher Scoring iterations: 6
# Linear Regression
linear_model <- lm(ICU_LOS ~ age + LOSdays + HRinitial + SBPinitial + DBPinitial  + airwayinterven + bleedingcontrol + Edduration_mins + RRdispo, data = survey_data1)

summary(linear_model)
## 
## Call:
## lm(formula = ICU_LOS ~ age + LOSdays + HRinitial + SBPinitial + 
##     DBPinitial + airwayinterven + bleedingcontrol + Edduration_mins + 
##     RRdispo, data = survey_data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7506 -1.6892 -0.0043  1.6530  5.6807 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     9.970627   2.196803   4.539 9.71e-06 ***
## age                             0.006328   0.011637   0.544 0.587171    
## LOSdays                         0.208013   0.013271  15.674  < 2e-16 ***
## HRinitial                      -0.041978   0.011978  -3.505 0.000563 ***
## SBPinitial                     -0.063395   0.012235  -5.182 5.31e-07 ***
## DBPinitial                     -0.006157   0.019517  -0.315 0.752711    
## airwayintervennone             -0.875895   0.693054  -1.264 0.207752    
## airwayintervenposition         -0.384491   1.097914  -0.350 0.726552    
## airwayintervensuction           0.066556   0.731688   0.091 0.927613    
## airwayintervensuction, ETT     -2.841872   2.373300  -1.197 0.232540    
## bleedingcontrolbandage/ IVF     3.415223   2.250020   1.518 0.130612    
## bleedingcontroldirect pressure  0.707037   1.351784   0.523 0.601520    
## bleedingcontrolIVF              0.661785   0.690736   0.958 0.339164    
## bleedingcontrolN/A              0.154561   0.813416   0.190 0.849488    
## bleedingcontrolnone            -0.179079   0.557471  -0.321 0.748365    
## bleedingcontrolpressure         1.345522   0.483945   2.780 0.005944 ** 
## Edduration_mins                 0.037809   0.004558   8.295 1.52e-14 ***
## RRdispo                         0.068542   0.043906   1.561 0.120064    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.213 on 202 degrees of freedom
## Multiple R-squared:  0.7329, Adjusted R-squared:  0.7104 
## F-statistic:  32.6 on 17 and 202 DF,  p-value: < 2.2e-16
# Generalized Linear Model (GLM)
glm_model <- glm(LOSdays ~ ICU_LOS + HRinitial + SBPinitial + DBPinitial   + airwayinterven + bleedingcontrol+ Edduration_mins + HRdispo + RRdispo, family = gaussian, data = survey_data1)
summary(glm_model)
## 
## Call:
## glm(formula = LOSdays ~ ICU_LOS + HRinitial + SBPinitial + DBPinitial + 
##     airwayinterven + bleedingcontrol + Edduration_mins + HRdispo + 
##     RRdispo, family = gaussian, data = survey_data1)
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    -15.91708    7.74542  -2.055 0.041163 *  
## ICU_LOS                          2.71214    0.17214  15.755  < 2e-16 ***
## HRinitial                        0.07040    0.04375   1.609 0.109175    
## SBPinitial                       0.10094    0.04587   2.200 0.028911 *  
## DBPinitial                       0.15570    0.06714   2.319 0.021391 *  
## airwayintervennone               8.28663    2.37594   3.488 0.000598 ***
## airwayintervenposition           4.22802    3.86277   1.095 0.275015    
## airwayintervensuction            2.87607    2.57694   1.116 0.265713    
## airwayintervensuction, ETT       1.82175    8.42041   0.216 0.828934    
## bleedingcontrolbandage/ IVF     -3.19016    7.99388  -0.399 0.690259    
## bleedingcontroldirect pressure  -3.61301    4.70359  -0.768 0.443302    
## bleedingcontrolIVF              -0.79977    2.47754  -0.323 0.747175    
## bleedingcontrolN/A               3.88405    2.87721   1.350 0.178547    
## bleedingcontrolnone              1.32318    1.96939   0.672 0.502431    
## bleedingcontrolpressure         -2.09114    1.77363  -1.179 0.239777    
## Edduration_mins                 -0.08818    0.01832  -4.813 2.91e-06 ***
## HRdispo                         -0.06970    0.03986  -1.749 0.081860 .  
## RRdispo                         -0.19259    0.15711  -1.226 0.221682    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 61.16212)
## 
##     Null deviance: 36581  on 219  degrees of freedom
## Residual deviance: 12355  on 202  degrees of freedom
## AIC: 1548.5
## 
## Number of Fisher Scoring iterations: 2
#Factors influencing clinical management of trauma patients presenting at EDs in Western Kenya
# Load necessary libraries
library(ggplot2)
# Your analysis and visualization code here
# For example:
# Assess the relationship between triage duration and other variables
triage_duration_model1 <- lm(Durationmins ~ time + treat_control+HRinitial + SBPinitial + DBPinitial, data = survey_data1)

# View summary of the model
summary(triage_duration_model1)
## 
## Call:
## lm(formula = Durationmins ~ time + treat_control + HRinitial + 
##     SBPinitial + DBPinitial, data = survey_data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -19.473  -7.358  -2.054   6.964  38.296 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            25.01198    7.32242   3.416 0.000761 ***
## timeendline            -5.73511    1.42718  -4.018 8.11e-05 ***
## treat_controltreatment -6.65236    1.43794  -4.626 6.44e-06 ***
## HRinitial               0.02602    0.05085   0.512 0.609400    
## SBPinitial              0.07341    0.04974   1.476 0.141424    
## DBPinitial             -0.09818    0.08190  -1.199 0.231935    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.48 on 214 degrees of freedom
## Multiple R-squared:  0.1529, Adjusted R-squared:  0.1331 
## F-statistic: 7.727 on 5 and 214 DF,  p-value: 1.06e-06
# Linear Regression
triage_duration_model2 <-  lm(Durationmins  ~ age + LOSdays + HRinitial + SBPinitial + DBPinitial  + airwayinterven + bleedingcontrol + Edduration_mins + RRdispo, data = survey_data1)

summary(triage_duration_model2)
## 
## Call:
## lm(formula = Durationmins ~ age + LOSdays + HRinitial + SBPinitial + 
##     DBPinitial + airwayinterven + bleedingcontrol + Edduration_mins + 
##     RRdispo, data = survey_data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.670  -7.211  -1.633   6.694  35.631 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     22.96099   10.69624   2.147  0.03301 *  
## age                              0.09939    0.05666   1.754  0.08094 .  
## LOSdays                          0.06385    0.06462   0.988  0.32427    
## HRinitial                       -0.03894    0.05832  -0.668  0.50511    
## SBPinitial                       0.08835    0.05957   1.483  0.13961    
## DBPinitial                      -0.11970    0.09503  -1.260  0.20925    
## airwayintervennone              -5.26179    3.37448  -1.559  0.12049    
## airwayintervenposition          -5.50784    5.34575  -1.030  0.30409    
## airwayintervensuction           -7.16109    3.56259  -2.010  0.04575 *  
## airwayintervensuction, ETT     -16.36221   11.55561  -1.416  0.15833    
## bleedingcontrolbandage/ IVF      8.17158   10.95536   0.746  0.45660    
## bleedingcontroldirect pressure  -7.46137    6.58185  -1.134  0.25829    
## bleedingcontrolIVF              -7.08133    3.36320  -2.106  0.03648 *  
## bleedingcontrolN/A               9.02988    3.96053   2.280  0.02365 *  
## bleedingcontrolnone             -1.70944    2.71433  -0.630  0.52955    
## bleedingcontrolpressure          1.00944    2.35633   0.428  0.66882    
## Edduration_mins                  0.07827    0.02219   3.527  0.00052 ***
## RRdispo                         -0.15100    0.21378  -0.706  0.48079    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.77 on 202 degrees of freedom
## Multiple R-squared:  0.1549, Adjusted R-squared:  0.08379 
## F-statistic: 2.178 on 17 and 202 DF,  p-value: 0.005795
library(ggplot2)

# Visualize the relationship between triage duration and other variables for triage_duration_model1 using a boxplot
ggplot(survey_data1, aes(x = time, y = Durationmins)) +
  geom_boxplot(fill = "skyblue", color = "darkblue") +
  labs(title = "Relationship between Time and Triage Duration (Model 1)",
       x = "Time",
       y = "Triage Duration (minutes)")

# Visualize the relationship between triage duration and other variables for triage_duration_model2 with improved color and clarity
ggplot(survey_data1, aes(x = age, y = Durationmins)) +
  geom_point(color = "darkred", alpha = 0.6) +
  geom_smooth(method = "lm", color = "blue") +
  labs(title = "Relationship between Age and Triage Duration (Model 2)",
       x = "Age",
       y = "Triage Duration (minutes)")
## `geom_smooth()` using formula = 'y ~ x'

# Visualize the relationship between triage duration and other variables for triage_duration_model3 with improved color and clarity
ggplot(survey_data1, aes(x = LOSdays, y = Durationmins)) +
  geom_point(color = "darkgreen", alpha = 0.6) +
  geom_smooth(method = "lm", color = "orange") +
  labs(title = "Relationship between LOS Days and Triage Duration (Model 3)",
       x = "LOS Days",
       y = "Triage Duration (minutes)")
## `geom_smooth()` using formula = 'y ~ x'

# Visualize the relationship between triage duration and other variables for triage_duration_model1 using a boxplot
ggplot(survey_data1, aes(x = treat_control, y = Durationmins)) +
  geom_boxplot(fill = "skyblue", color = "darkblue") +
  labs(title = "Relationship between treat_control and Triage Duration (Model 4)",
       x = "treat_control",
       y = "Triage Duration (minutes)")

#Patient outcomes in trauma patients presenting at EDs in Western Kenya
#ICU LoS
# Filter data for treatment and control groups
treatment_icu_los <- na.omit(survey_data1$ICU_LOS[survey_data1$treat_control == "treatment"])
control_icu_los <- na.omit(survey_data1$ICU_LOS[survey_data1$treat_control == "control"])

# Combine the ICU LOS data and group labels into a data frame
icu_los_data <- data.frame(
  ICU_LOS = c(treatment_icu_los, control_icu_los),
  Group = factor(rep(c("Treatment", "Control"), c(length(treatment_icu_los), length(control_icu_los))), levels = c("Treatment", "Control"))
)

# Create a box plot to visualize the distribution of ICU LOS by group
ggplot(icu_los_data, aes(x = Group, y = ICU_LOS, fill = Group)) +
  geom_boxplot() +
  labs(title = "Comparison of ICU Length of Stay between Treatment and Control Groups",
       x = "Group",
       y = "ICU Length of Stay (Days)",
       fill = "Group") +
  theme_minimal() +
  theme(legend.position = "none")

# Hospital LoS
# Filter data for treatment and control groups
treatment_hospital_los <- na.omit(survey_data1$LOSdays[survey_data1$treat_control == "treatment"])
control_hospital_los <- na.omit(survey_data1$LOSdays[survey_data1$treat_control == "control"])

# Combine the Hospital LOS data and group labels into a data frame
hospital_los_data <- data.frame(
  LOSdays = c(treatment_hospital_los, control_hospital_los),
  Group = factor(rep(c("Treatment", "Control"), c(length(treatment_hospital_los), length(control_hospital_los))), levels = c("Treatment", "Control"))
)

# Create a box plot to visualize the distribution of Hospital LOS by group
ggplot(hospital_los_data, aes(x = Group, y = LOSdays, fill = Group)) +
  geom_boxplot() +
  labs(title = "Comparison of Hospital Length of Stay between Treatment and Control Groups",
       x = "Group",
       y = "Hospital Length of Stay (Days)",
       fill = "Group") +
  theme_minimal() +
  theme(legend.position = "none")

#To test the effect of MTCP at EDs in Western Kenya (association)
# Perform t-test for ICU LOS
t_test_icu_los <- t.test(survey_data1$ICU_LOS ~ survey_data1$treat_control)

# Print the result
print(t_test_icu_los)
## 
##  Welch Two Sample t-test
## 
## data:  survey_data1$ICU_LOS by survey_data1$treat_control
## t = 3.3028, df = 215.89, p-value = 0.00112
## alternative hypothesis: true difference in means between group control and group treatment is not equal to 0
## 95 percent confidence interval:
##  0.7221408 2.8596774
## sample estimates:
##   mean in group control mean in group treatment 
##                7.190909                5.400000
# Create an interaction term between treatment and time
survey_data1$treat_control_time <- interaction(survey_data1$treat_control, survey_data1$time)

# Fit DiD regression model
did_model <- lm(LOSdays ~ treat_control + time + treat_control_time, data = survey_data1)

# Print the summary
summary(did_model)
## 
## Call:
## lm(formula = LOSdays ~ treat_control + time + treat_control_time, 
##     data = survey_data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.453  -7.239   0.030   6.453  41.298 
## 
## Coefficients: (2 not defined because of singularities)
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            22.239      1.888  11.778   <2e-16 ***
## treat_controltreatment                 -5.906      2.379  -2.483   0.0138 *  
## timeendline                             2.214      2.476   0.894   0.3721    
## treat_control_timetreatment.baseline    7.369      3.479   2.118   0.0353 *  
## treat_control_timecontrol.endline          NA         NA      NA       NA    
## treat_control_timetreatment.endline        NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.81 on 216 degrees of freedom
## Multiple R-squared:  0.03155,    Adjusted R-squared:  0.0181 
## F-statistic: 2.346 on 3 and 216 DF,  p-value: 0.07382
# Fit DiD regression model
did_model <- lm(LOSdays ~ treat_control + time + treat_control_time, data = survey_data1)

# Extract coefficients from the model
coefficients <- coef(did_model)

# Extract standard errors of coefficients
standard_errors <- summary(did_model)$coefficients[, "Std. Error"]

# Calculate Cohen's d for the treatment effect
treatment_effect <- coefficients["treat_controltreatment"]  # Coefficient for the treatment variable
standard_error_treatment <- standard_errors["treat_controltreatment"]  # Standard error for the treatment coefficient
n <- nrow(survey_data1)  # Sample size

cohen_d <- treatment_effect / standard_error_treatment

# Print Cohen's d
print(cohen_d)
## treat_controltreatment 
##              -2.483036
library(ggplot2)

# Visualize the change in outcome variables over time for the treatment and control groups
ggplot(survey_data1, aes(x = time, y = LOSdays, color = factor(treat_control))) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(title = "Change in LOS Days over Time by Treatment Group",
       x = "Time",
       y = "LOS Days",
       color = "Treatment Group") +
  scale_color_manual(values = c("blue", "red"))  # Customizing colors for treatment and control groups
## `geom_smooth()` using formula = 'y ~ x'

# Perform Difference-in-Differences (DiD) analysis
did_model <- lm(LOSdays ~ treat_control * time, data = survey_data1)

# Print the summary
summary(did_model)
## 
## Call:
## lm(formula = LOSdays ~ treat_control * time, data = survey_data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.453  -7.239   0.030   6.453  41.298 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                          22.239      1.888  11.778   <2e-16 ***
## treat_controltreatment                1.463      2.538   0.576   0.5651    
## timeendline                           2.214      2.476   0.894   0.3721    
## treat_controltreatment:timeendline   -7.369      3.479  -2.118   0.0353 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.81 on 216 degrees of freedom
## Multiple R-squared:  0.03155,    Adjusted R-squared:  0.0181 
## F-statistic: 2.346 on 3 and 216 DF,  p-value: 0.07382
# Calculate effect sizes and perform inferential analysis
# Extract coefficients from the model
coefficients <- coef(did_model)

# Extract standard errors of coefficients
standard_errors <- summary(did_model)$coefficients[, "Std. Error"]

# Calculate Cohen's d for the treatment effect
treatment_effect <- coefficients["treat_controlTRUE:time1"] - coefficients["treat_controlTRUE:time0"]  
# Coefficient for the treatment*time interaction
standard_error_treatment <- standard_errors["treat_controlTRUE:time1"] + standard_errors["treat_controlTRUE:time0"]
# Sum of standard errors for the treatment*time interaction

n <- nrow(survey_data1)  # Sample size

cohen_d <- treatment_effect / standard_error_treatment

# Print Cohen's d
print(paste("Cohen's d for treatment effect:", round(cohen_d, 2)))
## [1] "Cohen's d for treatment effect: NA"
library(ggplot2)
library(dplyr)

# Visualize the change in LOS over time by treatment group
ggplot(survey_data1, aes(x = time, y = LOSdays, color = factor(treat_control))) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  facet_wrap(~treat_control, ncol = 1) +  # Separate plots for treatment and control groups
  labs(title = "Change in Length of Stay (LOS) Over Time by Treatment Group",
       x = "Time",
       y = "Length of Stay (LOS) Days",
       color = "Treatment Group") +
  scale_color_manual(values = c("blue", "red")) +  # Customizing colors for treatment and control groups
  theme_minimal()  # Applying a minimal theme for better clarity
## `geom_smooth()` using formula = 'y ~ x'

# Perform Difference-in-Differences (DiD) analysis
did_model <- lm(LOSdays ~ treat_control * time, data = survey_data1)

# Print the summary
summary(did_model)
## 
## Call:
## lm(formula = LOSdays ~ treat_control * time, data = survey_data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.453  -7.239   0.030   6.453  41.298 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                          22.239      1.888  11.778   <2e-16 ***
## treat_controltreatment                1.463      2.538   0.576   0.5651    
## timeendline                           2.214      2.476   0.894   0.3721    
## treat_controltreatment:timeendline   -7.369      3.479  -2.118   0.0353 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.81 on 216 degrees of freedom
## Multiple R-squared:  0.03155,    Adjusted R-squared:  0.0181 
## F-statistic: 2.346 on 3 and 216 DF,  p-value: 0.07382
# Calculate effect sizes and perform inferential analysis
# Extract coefficients from the model
coefficients <- coef(did_model)

# Extract standard errors of coefficients
standard_errors <- summary(did_model)$coefficients[, "Std. Error"]

# Calculate t-values and p-values for treatment*time interaction
t_values <- coefficients["treat_controlTRUE:time1"] / standard_errors["treat_controlTRUE:time1"]
p_values <- 2 * pt(abs(t_values), df = nrow(survey_data1) - length(coefficients))

# Print t-values and p-values
print("Treatment*time Interaction:")
## [1] "Treatment*time Interaction:"
print(paste("t-value:", round(t_values, 2)))
## [1] "t-value: NA"
print(paste("p-value:", round(p_values, 4)))
## [1] "p-value: NA"
# Check assumptions of DiD model
# Residuals vs Fitted plot
plot(residuals(did_model), fitted(did_model),
     xlab = "Fitted values", ylab = "Residuals",
     main = "Residuals vs Fitted Plot for DiD Model")
abline(h = 0, col = "red")

# Normal Q-Q plot
qqnorm(residuals(did_model))
qqline(residuals(did_model))

# Shapiro-Wilk test for normality of residuals
shapiro.test(residuals(did_model))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(did_model)
## W = 0.94552, p-value = 2.375e-07
# Refinement of the DiD Model
did_model_refined <- lm(LOSdays ~ treat_control * time + ICU_LOS + HRinitial + SBPinitial + DBPinitial + airwayinterven + bleedingcontrol + Edduration_mins + HRdispo + RRdispo, data = survey_data1)

# Check model summary
summary(did_model_refined)
## 
## Call:
## lm(formula = LOSdays ~ treat_control * time + ICU_LOS + HRinitial + 
##     SBPinitial + DBPinitial + airwayinterven + bleedingcontrol + 
##     Edduration_mins + HRdispo + RRdispo, data = survey_data1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -27.4324  -4.9947  -0.0152   4.5810  22.7349 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        -17.91500    7.81601  -2.292 0.022947 *  
## treat_controltreatment               2.57852    1.59314   1.619 0.107135    
## timeendline                          0.72714    1.52054   0.478 0.633025    
## ICU_LOS                              2.75372    0.17637  15.613  < 2e-16 ***
## HRinitial                            0.06671    0.04373   1.526 0.128713    
## SBPinitial                           0.10619    0.04609   2.304 0.022268 *  
## DBPinitial                           0.15292    0.06710   2.279 0.023736 *  
## airwayintervennone                   8.29011    2.38174   3.481 0.000615 ***
## airwayintervenposition               3.89525    3.86928   1.007 0.315296    
## airwayintervensuction                2.73867    2.58292   1.060 0.290293    
## airwayintervensuction, ETT           0.90494    8.43627   0.107 0.914684    
## bleedingcontrolbandage/ IVF         -4.29115    8.01787  -0.535 0.593111    
## bleedingcontroldirect pressure      -3.14696    4.70375  -0.669 0.504251    
## bleedingcontrolIVF                  -0.50744    2.48992  -0.204 0.838719    
## bleedingcontrolN/A                   3.89298    2.87174   1.356 0.176758    
## bleedingcontrolnone                  1.13561    1.96699   0.577 0.564367    
## bleedingcontrolpressure             -2.17828    1.77680  -1.226 0.221663    
## Edduration_mins                     -0.08305    0.01854  -4.479 1.26e-05 ***
## HRdispo                             -0.07037    0.03978  -1.769 0.078399 .  
## RRdispo                             -0.19056    0.15729  -1.211 0.227147    
## treat_controltreatment:timeendline  -0.61169    2.20302  -0.278 0.781561    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.8 on 199 degrees of freedom
## Multiple R-squared:  0.669,  Adjusted R-squared:  0.6357 
## F-statistic: 20.11 on 20 and 199 DF,  p-value: < 2.2e-16
# Additional Tests - Bootstrap
library(boot)
## 
## Attaching package: 'boot'
## The following object is masked from 'package:lattice':
## 
##     melanoma
## The following object is masked from 'package:psych':
## 
##     logit
## The following object is masked from 'package:survival':
## 
##     aml
# Define function to estimate treatment effect in DiD model
did_estimate <- function(data, indices) {
  subset <- data[indices, ]
  model <- lm(LOSdays ~ treat_control * time, data = subset)
  treatment_effect <- coef(model)["treat_controltreatment:timeendline"]
  return(treatment_effect)
}
# Perform bootstrap
boot_results <- boot(data = survey_data1, statistic = did_estimate, R = 1000)
# Get bootstrap confidence intervals
boot_ci <- boot.ci(boot_results, type = "bca")

# Print bootstrap confidence intervals
boot_ci
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = boot_results, type = "bca")
## 
## Intervals : 
## Level       BCa          
## 95%   (-13.916,  -0.392 )  
## Calculations and Intervals on Original Scale
# Sensitivity Analysis (e.g., controlling for additional covariates, subgroup analysis)

# Visualizations
# Treatment effect over time
# Calculate mean LOS and confidence intervals for each group at each time point
library(dplyr)
library(tidyr)

mean_ci <- survey_data1 %>%
  group_by(time, treat_control) %>%
  summarise(mean_LOS = mean(LOSdays), 
            lower_ci = mean(LOSdays) - qt(0.975, length(LOSdays) - 1) * (sd(LOSdays) / sqrt(length(LOSdays))), 
            upper_ci = mean(LOSdays) + qt(0.975, length(LOSdays) - 1) * (sd(LOSdays) / sqrt(length(LOSdays)))) %>%
  ungroup()
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Plot treatment effect over time with error bars using bar plot
ggplot(mean_ci, aes(x = time, y = mean_LOS, fill = treat_control)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin = lower_ci, ymax = upper_ci), width = 0.2, position = position_dodge(width = 0.9)) +
  labs(title = "Treatment Effect Over Time",
       x = "Time",
       y = "Mean Length of Stay",
       fill = "Group") +
  theme_minimal()

# Balance plot for covariates
covariate_balance_plot <- function(data, covariate) {
  ggplot(data, aes(x = treat_control, y = !!sym(covariate), fill = treat_control)) +
    geom_boxplot() +
    labs(title = paste("Balance Plot for", covariate),
         x = "Group",
         y = covariate,
         fill = "Group") +
    theme_minimal()
}

covariate_balance_plot(survey_data1, "ICU_LOS")

# Distribution of outcome variable
ggplot(survey_data1, aes(x = LOSdays, fill = treat_control)) +
  geom_histogram(binwidth = 1, position = "identity", alpha = 0.5) +
  labs(title = "Distribution of Length of Stay",
       x = "Length of Stay",
       y = "Frequency",
       fill = "Group") +
  theme_minimal()

options(repos = c(CRAN = "https://cran.r-project.org"))