# Load necessary libraries
library(survey)      # For handling complex survey design
## Warning: package 'survey' was built under R version 4.3.3
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(mice)        # For multiple imputation of missing data
## Warning: package 'mice' was built under R version 4.3.3
## 
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
## 
##     filter
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
library(dplyr)       # For data manipulation
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)     # For data visualization
## Warning: package 'ggplot2' was built under R version 4.3.3
library(psych)       # For reliability analysis
## Warning: package 'psych' was built under R version 4.3.3
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(caret)       # For resampling and imbalanced data handling
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 4.3.3
## 
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
## 
##     cluster
library(knitr)       # For dynamic report generation
## Warning: package 'knitr' was built under R version 4.3.3
library(lattice)     # For advanced data visualization
library(tidyverse)   # For data manipulation and visualization
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'tibble' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'stringr' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ psych::%+%()    masks ggplot2::%+%()
## ✖ psych::alpha()  masks ggplot2::alpha()
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks mice::filter(), stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ purrr::lift()   masks caret::lift()
## ✖ tidyr::pack()   masks Matrix::pack()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(likert)      # For Likert scale analysis
## Warning: package 'likert' was built under R version 4.3.3
## Loading required package: xtable
## Warning: package 'xtable' was built under R version 4.3.3
## 
## Attaching package: 'likert'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
library(MASS)        # For advanced statistical analysis
## Warning: package 'MASS' was built under R version 4.3.3
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library(viridis)     # For color-blind friendly visualization
## Warning: package 'viridis' was built under R version 4.3.3
## Loading required package: viridisLite
## Warning: package 'viridisLite' was built under R version 4.3.3
library(here)        # For managing file paths
## Warning: package 'here' was built under R version 4.3.3
## here() starts at C:/nick/practice
library(flextable)   # For creating flexible tables
## Warning: package 'flextable' was built under R version 4.3.3
## 
## Attaching package: 'flextable'
## 
## The following object is masked from 'package:xtable':
## 
##     align
## 
## The following object is masked from 'package:purrr':
## 
##     compose
library(devtools)    # For installing and managing packages from GitHub
## Warning: package 'devtools' was built under R version 4.3.3
## Loading required package: usethis
## Warning: package 'usethis' was built under R version 4.3.3
library(GPArotation) # For factor rotation
## Warning: package 'GPArotation' was built under R version 4.3.3
## 
## Attaching package: 'GPArotation'
## 
## The following objects are masked from 'package:psych':
## 
##     equamax, varimin
library(ufs)         # For user-friendly science
# Import the CSV file into R
survey_data <- read.csv("C:/nick/practice/traumadata/traumaData.csv")

# View the structure of the imported data
str(survey_data)
## 'data.frame':    256 obs. of  53 variables:
##  $ code             : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ county           : chr  "Migori" "Migori" "Migori" "Migori" ...
##  $ time             : chr  "endline" "endline" "endline" "endline" ...
##  $ group            : chr  "control" "control" "control" "control" ...
##  $ age              : int  30 38 34 28 32 41 22 29 33 45 ...
##  $ gender           : chr  "male" "male" "female" "female" ...
##  $ mstatus          : chr  "married" "married" "married" "single" ...
##  $ edulevel         : chr  "secondary" "secondary" "secondary" "college" ...
##  $ occup            : chr  "mason" "tailor" "business" "business" ...
##  $ dateinjury       : chr  "20/8/2022" "15/8/2022" "2/9/2022" "8/9/2022" ...
##  $ timeinj          : chr  "1500hrs" "1000hrs" "1300hrs" "0900hrs" ...
##  $ MOI              : chr  "fall" "rti" "rti" "stab" ...
##  $ anatlocation     : chr  "head" "chest" "extremities, pelvis" "chest" ...
##  $ ISS              : int  16 15 15 15 15 15 15 16 15 16 ...
##  $ admdate          : chr  "20/8/2022" "15/8/2022" "2/9/2022" "8/9/2022" ...
##  $ arrivaltime      : chr  "1530hrs" "1040hrs" "1400hrs" "1000hrs" ...
##  $ arrivalmode      : chr  "car" "car" "motorbike" "taxi" ...
##  $ refby            : chr  "self" "self" "self" "nurse" ...
##  $ receiveby        : chr  "rco" "nurse" "nurse" "mo" ...
##  $ triagecategory   : chr  "urgent" "urgent" "urgent" "immediate" ...
##  $ timeassmnt       : chr  "1550hrs" "1055hrs" "1430hrs" "1025hrs" ...
##  $ Durationmins     : int  20 15 30 25 20 25 30 30 30 40 ...
##  $ state            : chr  "alive" "alive" "alive" "alive" ...
##  $ HRinitial        : int  80 88 75 90 80 95 90 90 86 105 ...
##  $ SBPinitial       : int  120 130 125 130 120 125 115 110 130 110 ...
##  $ DBPinitial       : int  80 76 80 80 75 74 78 80 60 70 ...
##  $ RRinitial        : int  20 18 22 20 22 21 18 18 22 20 ...
##  $ SPO2initial      : int  94 93 90 95 95 93 94 94 90 95 ...
##  $ painscore        : int  10 10 10 9 10 10 8 9 10 10 ...
##  $ GCS              : int  14 13 15 15 13 12 15 13 14 15 ...
##  $ AVPU             : chr  "a" "v" "a" "a" ...
##  $ airwaystatus     : chr  "patent" "patent" "patent" "patent" ...
##  $ airwayinterven   : chr  "none" "none" "none" "none" ...
##  $ spinestabilized  : chr  "no" "no" "no" "no" ...
##  $ breathingstatus  : chr  "spont" "spont" "spont" "spont" ...
##  $ breathinginterven: chr  "none" "none" "none" "none" ...
##  $ bleedingcontrol  : chr  "IVF" "bandage" "pressure" "bandage" ...
##  $ Fluidadmin       : chr  "NS 500mls" "none" "NS 500mls" "NS 500mls" ...
##  $ meds             : chr  "tramadol" "tramadol" "tramadol" "analgesics" ...
##  $ Eddeparture      : chr  "1730hrs" "1200hrs" "1600hrs" "1110hrs" ...
##  $ Edduration_mins  : int  100 65 90 45 50 125 100 80 120 130 ...
##  $ HRdispo          : int  95 78 95 87 78 96 95 60 95 70 ...
##  $ SBPdispo         : int  93 112 105 127 130 110 105 100 133 120 ...
##  $ DBPdispo         : int  70 80 85 69 78 70 80 55 80 70 ...
##  $ RRdispo          : int  20 16 21 19 16 15 18 18 21 20 ...
##  $ SPO2dispo        : int  95 95 93 94 94 93 95 93 94 95 ...
##  $ Admit            : chr  "icu" "ward" "ward" "ward" ...
##  $ hospdischarge    : chr  "21/9/2022" "14/9/2022" "2/10/2022" "23/9/2022" ...
##  $ LOSdays          : int  30 29 30 15 17 22 26 34 29 30 ...
##  $ state.1          : chr  "stable" "stable" "stable" "stable" ...
##  $ outcome          : chr  "alive" "alive" "alive" "alive" ...
##  $ ICU_LOS          : int  14 NA NA NA NA 10 NA NA NA NA ...
##  $ ward_LOS         : int  16 NA NA NA NA 12 NA NA NA NA ...
# View the first few rows of the data
head(survey_data)
##   code county    time   group age gender mstatus  edulevel         occup
## 1    1 Migori endline control  30   male married secondary         mason
## 2    2 Migori endline control  38   male married secondary        tailor
## 3    3 Migori endline control  34 female married secondary      business
## 4    4 Migori endline control  28 female  single   college      business
## 5    5 Migori endline control  32   male married secondary        driver
## 6    6 Migori endline control  41   male married   college administrator
##   dateinjury timeinj  MOI        anatlocation ISS   admdate arrivaltime
## 1  20/8/2022 1500hrs fall                head  16 20/8/2022     1530hrs
## 2  15/8/2022 1000hrs  rti               chest  15 15/8/2022     1040hrs
## 3   2/9/2022 1300hrs  rti extremities, pelvis  15  2/9/2022     1400hrs
## 4   8/9/2022 0900hrs stab               chest  15  8/9/2022     1000hrs
## 5  13/9/2022 2000hrs  rti                head  15 13/9/2022     2030hrs
## 6  17/9/2022 1000hrs fall          head, face  15 17/9/2022     1100hrs
##   arrivalmode refby receiveby triagecategory timeassmnt Durationmins state
## 1         car  self       rco         urgent    1550hrs           20 alive
## 2         car  self     nurse         urgent    1055hrs           15 alive
## 3   motorbike  self     nurse         urgent    1430hrs           30 alive
## 4        taxi nurse        mo      immediate    1025hrs           25 alive
## 5   motorbike  self     nurse         urgent    2300hrs           20 alive
## 6   motorbike  self     nurse      immediate    1325hrs           25 alive
##   HRinitial SBPinitial DBPinitial RRinitial SPO2initial painscore GCS AVPU
## 1        80        120         80        20          94        10  14    a
## 2        88        130         76        18          93        10  13    v
## 3        75        125         80        22          90        10  15    a
## 4        90        130         80        20          95         9  15    a
## 5        80        120         75        22          95        10  13    a
## 6        95        125         74        21          93        10  12    p
##   airwaystatus airwayinterven spinestabilized breathingstatus breathinginterven
## 1       patent           none              no           spont              none
## 2       patent           none              no           spont              none
## 3       patent           none              no           spont              none
## 4       patent           none              no           spont              none
## 5       patent           none              no           spont              none
## 6   obstructed           none              no    supplemental              none
##   bleedingcontrol Fluidadmin       meds Eddeparture Edduration_mins HRdispo
## 1             IVF  NS 500mls   tramadol     1730hrs             100      95
## 2         bandage       none   tramadol     1200hrs              65      78
## 3        pressure  NS 500mls   tramadol     1600hrs              90      95
## 4         bandage  NS 500mls analgesics     1110hrs              45      87
## 5             IVF  NS 500mls   tramadol     2350hrs              50      78
## 6            none   mannitol   tramadol     1530hrs             125      96
##   SBPdispo DBPdispo RRdispo SPO2dispo Admit hospdischarge LOSdays state.1
## 1       93       70      20        95   icu     21/9/2022      30  stable
## 2      112       80      16        95  ward     14/9/2022      29  stable
## 3      105       85      21        93  ward     2/10/2022      30  stable
## 4      127       69      19        94  ward     23/9/2022      15  stable
## 5      130       78      16        94  ward     30/9/2022      17  stable
## 6      110       70      15        93   icu     9/10/2022      22  stable
##   outcome ICU_LOS ward_LOS
## 1   alive      14       16
## 2   alive      NA       NA
## 3   alive      NA       NA
## 4   alive      NA       NA
## 5   alive      NA       NA
## 6   alive      10       12
# Load the dplyr library
library(dplyr)

# Convert date columns to Date format
survey_data <- survey_data %>%
  mutate(dateinjury = as.Date(dateinjury),
         admdate = as.Date(admdate),
         hospdischarge = as.Date(hospdischarge))
#summary of demographics
library(dplyr)

# Read the survey data from the CSV file
survey_data1 <- read.csv("survey_data1.csv")

# Summarize variables by time and treat_control
summary_age <- survey_data1 %>%
  group_by(time, treat_control) %>%
  summarize(
    mean_age = mean(age, na.rm = TRUE),
  )
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Display the summary table
summary_age
## # A tibble: 4 × 3
## # Groups:   time [2]
##   time     treat_control mean_age
##   <chr>    <chr>            <dbl>
## 1 baseline control           39.0
## 2 baseline treatment         40.0
## 3 endline  control           41.9
## 4 endline  treatment         39.3
summary_gender <- survey_data1 %>%
  group_by(time, treat_control) %>%
  summarize(
    gender_count_male = sum(gender == "male", na.rm = TRUE),
    gender_count_female = sum(gender == "female", na.rm = TRUE)
  )
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Display the summary table
summary_gender
## # A tibble: 4 × 4
## # Groups:   time [2]
##   time     treat_control gender_count_male gender_count_female
##   <chr>    <chr>                     <int>               <int>
## 1 baseline control                      26                  20
## 2 baseline treatment                    33                  24
## 3 endline  control                      37                  27
## 4 endline  treatment                    33                  20
summary_marital <- survey_data1 %>%
  group_by(time, treat_control) %>%
  summarize(
    mstatus_single = sum(mstatus == "single", na.rm = TRUE),
    mstatus_married = sum(mstatus == "married", na.rm = TRUE),
    mstatus_divorced = sum(mstatus == "divorced", na.rm = TRUE)
      )
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Display the summary table
summary_marital
## # A tibble: 4 × 5
## # Groups:   time [2]
##   time     treat_control mstatus_single mstatus_married mstatus_divorced
##   <chr>    <chr>                  <int>           <int>            <int>
## 1 baseline control                   16              30                0
## 2 baseline treatment                 17              39                0
## 3 endline  control                   11              53                0
## 4 endline  treatment                 15              37                0
summary_educ <- survey_data1 %>%
  group_by(time, treat_control) %>%
  summarize(
    edulevel_primary = sum(edulevel == "primary", na.rm = TRUE),
    edulevel_secondary = sum(edulevel == "secondary", na.rm = TRUE),
    edulevel_tertiary = sum(edulevel == "tertiary", na.rm = TRUE)
  )
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Display the summary table
summary_educ
## # A tibble: 4 × 5
## # Groups:   time [2]
##   time     treat_control edulevel_primary edulevel_secondary edulevel_tertiary
##   <chr>    <chr>                    <int>              <int>             <int>
## 1 baseline control                     18                 20                 0
## 2 baseline treatment                   25                 19                 0
## 3 endline  control                     16                 34                 0
## 4 endline  treatment                   10                 31                 0
summary_occup <- survey_data1 %>%
  group_by(time, treat_control) %>%
  summarize(  total_occupations = n_distinct(occup),
    most_common_occupations = toString(head(names(sort(table(occup), decreasing = TRUE)), 3))
  )
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Display the summary table
summary_occup
## # A tibble: 4 × 4
## # Groups:   time [2]
##   time     treat_control total_occupations most_common_occupations     
##   <chr>    <chr>                     <int> <chr>                       
## 1 baseline control                       9 business, student, farmer   
## 2 baseline treatment                     9 business, student, farmer   
## 3 endline  control                      24 business, student, housewife
## 4 endline  treatment                    21 business, student, farmer
#Summary of MOI and anatlocation
summary_MOI <- survey_data1 %>%
  group_by(time, treat_control) %>%
  summarize(  total_MOI = n_distinct(MOI),
    most_common_MOI = toString(head(names(sort(table(MOI), decreasing = TRUE)), 3))
  )
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Display the summary table
summary_MOI
## # A tibble: 4 × 4
## # Groups:   time [2]
##   time     treat_control total_MOI most_common_MOI   
##   <chr>    <chr>             <int> <chr>             
## 1 baseline control              11 rti, stab, assault
## 2 baseline treatment             5 rti, fall, stab   
## 3 endline  control              10 rti, fall, stab   
## 4 endline  treatment             9 rti, fall, stab
summary_anatlocation <- survey_data1 %>%
  group_by(time, treat_control) %>%
  summarize(  total_anatlocation = n_distinct(anatlocation),
    most_common_anatlocation = toString(head(names(sort(table(anatlocation), decreasing = TRUE)), 3))
  )
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
# Display the summary table
summary_anatlocation
## # A tibble: 4 × 4
## # Groups:   time [2]
##   time     treat_control total_anatlocation most_common_anatlocation            
##   <chr>    <chr>                      <int> <chr>                               
## 1 baseline control                       15 head, chest, extremities, extremiti…
## 2 baseline treatment                     19 head, extremities, chest            
## 3 endline  control                       21 head, chest, extremities            
## 4 endline  treatment                     21 head, chest, chest, extremities
#Impute missing values in the numeric variables
# Load necessary library
library(mice)

# Define the variables with missing values
vars_with_missing <- c("HRinitial", "SBPinitial", "DBPinitial", "RRinitial", 
                       "SPO2initial", "painscore", "GCS", "Edduration_mins",
                       "HRdispo", "SBPdispo", "DBPdispo", "RRdispo", 
                       "SPO2dispo", "LOSdays", "ICU_LOS", "ward_LOS")

# Perform multiple imputation
imputed_data <- mice(survey_data1[, vars_with_missing], method = "pmm", m = 5)
## 
##  iter imp variable
##   1   1  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   1   2  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   1   3  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   1   4  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   1   5  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   2   1  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   2   2  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   2   3  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   2   4  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   2   5  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   3   1  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   3   2  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   3   3  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   3   4  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   3   5  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   4   1  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   4   2  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   4   3  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   4   4  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   4   5  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   5   1  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   5   2  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   5   3  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   5   4  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
##   5   5  painscore  HRdispo  SBPdispo  DBPdispo  RRdispo  SPO2dispo  ICU_LOS  ward_LOS
# Combine the imputed data with the original dataset
survey_data1_imputed <- complete(imputed_data)

# Show the imputed data
#survey_data1_imputed
# Replace missing variables in survey_data1 with imputed values
survey_data1[, vars_with_missing] <- survey_data1_imputed
#Factors influencing clinical management of trauma patients presenting at EDs in Western Kenya
# Load necessary libraries
library(ggplot2)
# Your analysis and visualization code here
# For example:
# Assess the relationship between triage duration and other variables
triage_duration_model <- lm(Durationmins ~ age + gender + mstatus + edulevel + occup, data = survey_data1)

# View summary of the model
summary(triage_duration_model)
## 
## Call:
## lm(formula = Durationmins ~ age + gender + mstatus + edulevel + 
##     occup, data = survey_data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.188  -7.202  -0.756   5.261  41.181 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              32.78702    9.20879   3.560 0.000474 ***
## age                       0.09340    0.08224   1.136 0.257594    
## gendermale               -1.43805    1.84184  -0.781 0.435964    
## mstatussingle             5.51792    2.85820   1.931 0.055109 .  
## mstatuswidowed           -3.33017    8.38297  -0.397 0.691651    
## edulevelprimary          -3.03107    3.35305  -0.904 0.367217    
## edulevelsecondary        -0.85068    2.94075  -0.289 0.772705    
## eduleveluniversity        9.10142    7.33904   1.240 0.216539    
## occupadministrator      -10.17836   14.20026  -0.717 0.474444    
## occupbar attendant      -23.15186   11.38145  -2.034 0.043401 *  
## occupbarber             -13.58048   13.88532  -0.978 0.329364    
## occupbodarider          -11.61064    9.86401  -1.177 0.240721    
## occupbusiness           -13.13453    8.16177  -1.609 0.109309    
## occupcasual labourer    -10.82233    8.60308  -1.258 0.210036    
## occupclerk              -26.73489   14.39089  -1.858 0.064836 .  
## occupcobbler             -8.36148   14.04165  -0.595 0.552273    
## occupdriver             -17.18975   10.34612  -1.661 0.098360 .  
## occupfarmer              -9.63668    8.40358  -1.147 0.253014    
## occupfishing            -19.98147   13.89058  -1.438 0.152031    
## occuphousewife          -10.53782    8.67261  -1.215 0.225931    
## occupmason              -14.23428   11.34061  -1.255 0.211048    
## occupmechanic           -23.30028   13.89742  -1.677 0.095358 .  
## occupnone                -8.47077    9.38078  -0.903 0.367737    
## occupnurse              -23.35121   14.09125  -1.657 0.099233 .  
## occupoffic assistant    -26.23428   13.87413  -1.891 0.060247 .  
## occuppastor             -27.27020   14.17270  -1.924 0.055915 .  
## occuppreacher           -13.92187   14.07256  -0.989 0.323848    
## occuppubic servant      -18.72480   14.09095  -1.329 0.185579    
## occuprtd public servant -27.22306   14.12489  -1.927 0.055515 .  
## occupsaleslady          -10.16285   14.16453  -0.717 0.474004    
## occupsalonist            14.22419   14.18812   1.003 0.317428    
## occupsecurity officer   -10.07654    8.91382  -1.130 0.259795    
## occupshop attendant     -20.29701   11.46869  -1.770 0.078458 .  
## occupstudent            -21.63462    8.80672  -2.457 0.014974 *  
## occuptailor             -17.35990   11.30397  -1.536 0.126359    
## occupteacher            -13.71254    8.77789  -1.562 0.120005    
## occuptechnician         -17.65419   12.18642  -1.449 0.149167    
## occuptrader              -4.15096   14.22686  -0.292 0.770799    
## occupvendor             -13.49349   14.06372  -0.959 0.338617    
## occupvet officer        -26.97992   13.91387  -1.939 0.054057 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.3 on 180 degrees of freedom
## Multiple R-squared:  0.171,  Adjusted R-squared:  -0.008645 
## F-statistic: 0.9519 on 39 and 180 DF,  p-value: 0.5565
# Visualize the relationship between triage duration and age
ggplot(survey_data1, aes(x = age, y = Durationmins)) +
  geom_point() +
  geom_smooth(method = "lm") +
  labs(title = "Relationship between Age and Triage Duration",
       x = "Age",
       y = "Triage Duration (minutes)")
## `geom_smooth()` using formula = 'y ~ x'

#Patient outcomes in trauma patients presenting at EDs in Western Kenya
#ICU LoS
# Filter data for treatment and control groups
treatment_icu_los <- na.omit(survey_data1$ICU_LOS[survey_data1$treat_control == "treatment"])
control_icu_los <- na.omit(survey_data1$ICU_LOS[survey_data1$treat_control == "control"])

# Combine the ICU LOS data and group labels into a data frame
icu_los_data <- data.frame(
  ICU_LOS = c(treatment_icu_los, control_icu_los),
  Group = factor(rep(c("Treatment", "Control"), c(length(treatment_icu_los), length(control_icu_los))), levels = c("Treatment", "Control"))
)

# Create a box plot to visualize the distribution of ICU LOS by group
ggplot(icu_los_data, aes(x = Group, y = ICU_LOS, fill = Group)) +
  geom_boxplot() +
  labs(title = "Comparison of ICU Length of Stay between Treatment and Control Groups",
       x = "Group",
       y = "ICU Length of Stay (Days)",
       fill = "Group") +
  theme_minimal() +
  theme(legend.position = "none")

# Hospital LoS
# Filter data for treatment and control groups
treatment_hospital_los <- na.omit(survey_data1$LOSdays[survey_data1$treat_control == "treatment"])
control_hospital_los <- na.omit(survey_data1$LOSdays[survey_data1$treat_control == "control"])

# Combine the Hospital LOS data and group labels into a data frame
hospital_los_data <- data.frame(
  LOSdays = c(treatment_hospital_los, control_hospital_los),
  Group = factor(rep(c("Treatment", "Control"), c(length(treatment_hospital_los), length(control_hospital_los))), levels = c("Treatment", "Control"))
)

# Create a box plot to visualize the distribution of Hospital LOS by group
ggplot(hospital_los_data, aes(x = Group, y = LOSdays, fill = Group)) +
  geom_boxplot() +
  labs(title = "Comparison of Hospital Length of Stay between Treatment and Control Groups",
       x = "Group",
       y = "Hospital Length of Stay (Days)",
       fill = "Group") +
  theme_minimal() +
  theme(legend.position = "none")

#To test the effect of MTCP at EDs in Western Kenya (association)
# Perform t-test for ICU LOS
t_test_icu_los <- t.test(survey_data1$ICU_LOS ~ survey_data1$treat_control)

# Print the result
print(t_test_icu_los)
## 
##  Welch Two Sample t-test
## 
## data:  survey_data1$ICU_LOS by survey_data1$treat_control
## t = 2.1208, df = 217.98, p-value = 0.03507
## alternative hypothesis: true difference in means between group control and group treatment is not equal to 0
## 95 percent confidence interval:
##  0.08352086 2.28011551
## sample estimates:
##   mean in group control mean in group treatment 
##                8.154545                6.972727
# Create an interaction term between treatment and time
survey_data1$treat_control_time <- interaction(survey_data1$treat_control, survey_data1$time)

# Fit DiD regression model
did_model <- lm(LOSdays ~ treat_control + time + treat_control_time, data = survey_data1)

# Print the summary
summary(did_model)
## 
## Call:
## lm(formula = LOSdays ~ treat_control + time + treat_control_time, 
##     data = survey_data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.453  -7.239   0.030   6.453  41.298 
## 
## Coefficients: (2 not defined because of singularities)
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            22.239      1.888  11.778   <2e-16 ***
## treat_controltreatment                 -5.906      2.379  -2.483   0.0138 *  
## timeendline                             2.214      2.476   0.894   0.3721    
## treat_control_timetreatment.baseline    7.369      3.479   2.118   0.0353 *  
## treat_control_timecontrol.endline          NA         NA      NA       NA    
## treat_control_timetreatment.endline        NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.81 on 216 degrees of freedom
## Multiple R-squared:  0.03155,    Adjusted R-squared:  0.0181 
## F-statistic: 2.346 on 3 and 216 DF,  p-value: 0.07382
# Fit DiD regression model
did_model <- lm(LOSdays ~ treat_control + time + treat_control_time, data = survey_data1)

# Extract coefficients from the model
coefficients <- coef(did_model)

# Extract standard errors of coefficients
standard_errors <- summary(did_model)$coefficients[, "Std. Error"]

# Calculate Cohen's d for the treatment effect
treatment_effect <- coefficients["treat_controltreatment"]  # Coefficient for the treatment variable
standard_error_treatment <- standard_errors["treat_controltreatment"]  # Standard error for the treatment coefficient
n <- nrow(survey_data1)  # Sample size

cohen_d <- treatment_effect / standard_error_treatment

# Print Cohen's d
print(cohen_d)
## treat_controltreatment 
##              -2.483036
options(repos = c(CRAN = "https://cran.r-project.org"))