library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(psych)
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(tidyverse)
library(readxl)
library(psych)
library(knitr)
library(broom)
library(gtsummary)
library(corrplot)
## corrplot 0.95 loaded
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
countyjail_data <- read_excel(
  "~/Desktop/capstone bexar county jail/excel work sheets and r code/TXJail_DeathsCounties2015_2025 Capstone analysis.xlsx",
  sheet = "All_Data")
countyjail_data <- countyjail_data %>%
  mutate(Bexar = ifelse(agency_county == "BEXAR", 1, 0))
library(gtsummary)
library(gtsummary)

countyjail_data %>%
  select(preventable_death, housing_single_cell, mh_yes, suicidal_yes,
         sex_male, race_black, race_hispanic, age_at_time_of_death, Bexar) %>%
  tbl_summary(
    statistic = list(
      all_continuous() ~ "{mean} ({sd})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_continuous() ~ 2,
    label = list(
      preventable_death ~ "Preventable Death",
      housing_single_cell ~ "Single Cell Housing",
      mh_yes ~ "Mental Health Flag",
      suicidal_yes ~ "Suicidal Ideation",
      sex_male ~ "Male",
      race_black ~ "Black",
      race_hispanic ~ "Hispanic",
      age_at_time_of_death ~ "Age",
      Bexar ~ "Bexar County"
    )
  ) %>%
  bold_labels()
Characteristic N = 3901
Preventable Death 64 (16%)
Single Cell Housing 99 (25%)
Mental Health Flag 70 (18%)
Suicidal Ideation 28 (7.2%)
Male 346 (89%)
Black 155 (40%)
Hispanic 85 (22%)
Age 46.92 (14.89)
Bexar County 119 (31%)
1 n (%); Mean (SD)
library(corrplot)
library(gtsummary)

countyjail_data %>%
  select(preventable_death, housing_single_cell, mh_yes, suicidal_yes,
         sex_male, race_black, race_hispanic, age_at_time_of_death, Bexar) %>%
  tbl_summary(
    statistic = list(
      all_continuous() ~ "{mean} ({sd})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_continuous() ~ 2,
    label = list(
      preventable_death ~ "Preventable Death",
      housing_single_cell ~ "Single Cell Housing",
      mh_yes ~ "Mental Health Flag",
      suicidal_yes ~ "Suicidal Ideation",
      sex_male ~ "Male",
      race_black ~ "Black",
      race_hispanic ~ "Hispanic",
      age_at_time_of_death ~ "Age",
      Bexar ~ "Bexar County"
    )
  ) %>%
  bold_labels()
Characteristic N = 3901
Preventable Death 64 (16%)
Single Cell Housing 99 (25%)
Mental Health Flag 70 (18%)
Suicidal Ideation 28 (7.2%)
Male 346 (89%)
Black 155 (40%)
Hispanic 85 (22%)
Age 46.92 (14.89)
Bexar County 119 (31%)
1 n (%); Mean (SD)
library(corrplot)

cor_vars <- countyjail_data %>%
  select(preventable_death, housing_single_cell, mh_yes, suicidal_yes,
         sex_male, race_black, race_hispanic, age_at_time_of_death, Bexar) %>%
  na.omit()

cor_matrix <- cor(cor_vars, method = "pearson")

corrplot(cor_matrix,
         method = "color",
         type = "upper",
         tl.cex = 0.8,
         addCoef.col = "purple",
         number.cex = 0.7,
         title = "Correlation Matrix - Custodial Death Variables",
         mar = c(0,0,1,0))

model <- glm(preventable_death ~ housing_single_cell + mh_yes + suicidal_yes +
               sex_male + race_black + race_hispanic + age_at_time_of_death + Bexar,
             data = countyjail_data,
             family = "binomial")

summary(model)
## 
## Call:
## glm(formula = preventable_death ~ housing_single_cell + mh_yes + 
##     suicidal_yes + sex_male + race_black + race_hispanic + age_at_time_of_death + 
##     Bexar, family = "binomial", data = countyjail_data)
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           1.23862    0.76230   1.625 0.104198    
## housing_single_cell   0.94777    0.33268   2.849 0.004388 ** 
## mh_yes               -0.04381    0.48404  -0.091 0.927887    
## suicidal_yes          1.70008    0.62316   2.728 0.006369 ** 
## sex_male             -0.09354    0.47275  -0.198 0.843157    
## race_black           -1.46962    0.40916  -3.592 0.000328 ***
## race_hispanic        -0.40401    0.38346  -1.054 0.292063    
## age_at_time_of_death -0.07522    0.01332  -5.645 1.65e-08 ***
## Bexar                 1.19484    0.35211   3.393 0.000690 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 348.20  on 389  degrees of freedom
## Residual deviance: 261.14  on 381  degrees of freedom
## AIC: 279.14
## 
## Number of Fisher Scoring iterations: 6
library(kableExtra)
library(broom)
library(kableExtra)

tidy(model, exponentiate = TRUE, conf.int = TRUE) %>%
  select(term, estimate, conf.low, conf.high, p.value) %>%
  rename(
    Variable    = term,
    `Odds Ratio`    = estimate,
    `95% CI Lower`  = conf.low,
    `95% CI Upper`  = conf.high,
    `P-Value`       = p.value
  ) %>%
  kable(digits = 3, caption = "Logit Model: Odds Ratios for Preventable Death") %>%
  kable_styling()
Logit Model: Odds Ratios for Preventable Death
Variable Odds Ratio 95% CI Lower 95% CI Upper P-Value
(Intercept) 3.451 0.775 15.652 0.104
housing_single_cell 2.580 1.344 4.980 0.004
mh_yes 0.957 0.352 2.389 0.928
suicidal_yes 5.474 1.637 19.245 0.006
sex_male 0.911 0.372 2.407 0.843
race_black 0.230 0.099 0.499 0.000
race_hispanic 0.668 0.309 1.400 0.292
age_at_time_of_death 0.928 0.902 0.951 0.000
Bexar 3.303 1.669 6.676 0.001
# Set Bexar as reference group
countyjail_data <- countyjail_data %>%
  mutate(agency_county = factor(agency_county, 
                                levels = c("BEXAR", "DALLAS", "HARRIS", "TRAVIS")))

# All county logit model
model_allcounties <- glm(preventable_death ~ housing_single_cell + mh_yes + 
                           suicidal_yes + sex_male + race_black + race_hispanic + 
                           age_at_time_of_death + agency_county,
                         data = countyjail_data,
                         family = "binomial")

summary(model_allcounties)
## 
## Call:
## glm(formula = preventable_death ~ housing_single_cell + mh_yes + 
##     suicidal_yes + sex_male + race_black + race_hispanic + age_at_time_of_death + 
##     agency_county, family = "binomial", data = countyjail_data)
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           2.44867    0.75414   3.247 0.001166 ** 
## housing_single_cell   0.84278    0.34729   2.427 0.015236 *  
## mh_yes               -0.09167    0.50668  -0.181 0.856432    
## suicidal_yes          1.73780    0.63252   2.747 0.006006 ** 
## sex_male             -0.11607    0.47236  -0.246 0.805900    
## race_black           -1.43054    0.41272  -3.466 0.000528 ***
## race_hispanic        -0.40438    0.39111  -1.034 0.301175    
## age_at_time_of_death -0.07453    0.01344  -5.546 2.92e-08 ***
## agency_countyDALLAS  -1.56923    0.55455  -2.830 0.004659 ** 
## agency_countyHARRIS  -1.19106    0.41760  -2.852 0.004343 ** 
## agency_countyTRAVIS  -0.80992    0.51963  -1.559 0.119077    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 348.20  on 389  degrees of freedom
## Residual deviance: 259.63  on 379  degrees of freedom
## AIC: 281.63
## 
## Number of Fisher Scoring iterations: 6
# Odds ratios table
tidy(model_allcounties, exponentiate = TRUE, conf.int = TRUE) %>%
  select(term, estimate, conf.low, conf.high, p.value) %>%
  rename(
    Variable       = term,
    `Odds Ratio`   = estimate,
    `95% CI Lower` = conf.low,
    `95% CI Upper` = conf.high,
    `P-Value`      = p.value
  ) %>%
  knitr::kable(digits = 3, 
               caption = "Logit Model: All County Comparisons (Reference = Bexar)") %>%
  kable_styling()
Logit Model: All County Comparisons (Reference = Bexar)
Variable Odds Ratio 95% CI Lower 95% CI Upper P-Value
(Intercept) 11.573 2.696 52.853 0.001
housing_single_cell 2.323 1.173 4.604 0.015
mh_yes 0.912 0.321 2.383 0.856
suicidal_yes 5.685 1.673 20.372 0.006
sex_male 0.890 0.364 2.350 0.806
race_black 0.239 0.103 0.524 0.001
race_hispanic 0.667 0.305 1.424 0.301
age_at_time_of_death 0.928 0.903 0.952 0.000
agency_countyDALLAS 0.208 0.064 0.579 0.005
agency_countyHARRIS 0.304 0.131 0.678 0.004
agency_countyTRAVIS 0.445 0.155 1.200 0.119