logisticregressionanaysis-.knit

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(readxl)
library(psych)

## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

library(tidyverse)
library(readxl)
library(psych)
library(knitr)
library(broom)
library(gtsummary)
library(corrplot)

## corrplot 0.95 loaded

library(kableExtra)

## 
## Attaching package: 'kableExtra'

## The following object is masked from 'package:dplyr':
## 
##     group_rows

countyjail_data <- read_excel(
  "~/Desktop/capstone bexar county jail/excel work sheets and r code/TXJail_DeathsCounties2015_2025 Capstone analysis.xlsx",
  sheet = "All_Data")

countyjail_data <- countyjail_data %>%
  mutate(Bexar = ifelse(agency_county == "BEXAR", 1, 0))

library(gtsummary)

library(gtsummary)

countyjail_data %>%
  select(preventable_death, housing_single_cell, mh_yes, suicidal_yes,
         sex_male, race_black, race_hispanic, age_at_time_of_death, Bexar) %>%
  tbl_summary(
    statistic = list(
      all_continuous() ~ "{mean} ({sd})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_continuous() ~ 2,
    label = list(
      preventable_death ~ "Preventable Death",
      housing_single_cell ~ "Single Cell Housing",
      mh_yes ~ "Mental Health Flag",
      suicidal_yes ~ "Suicidal Ideation",
      sex_male ~ "Male",
      race_black ~ "Black",
      race_hispanic ~ "Hispanic",
      age_at_time_of_death ~ "Age",
      Bexar ~ "Bexar County"
    )
  ) %>%
  bold_labels()

Characteristic	N = 390¹
Preventable Death	64 (16%)
Single Cell Housing	99 (25%)
Mental Health Flag	70 (18%)
Suicidal Ideation	28 (7.2%)
Male	346 (89%)
Black	155 (40%)
Hispanic	85 (22%)
Age	46.92 (14.89)
Bexar County	119 (31%)
¹ n (%); Mean (SD)

library(corrplot)

library(gtsummary)

countyjail_data %>%
  select(preventable_death, housing_single_cell, mh_yes, suicidal_yes,
         sex_male, race_black, race_hispanic, age_at_time_of_death, Bexar) %>%
  tbl_summary(
    statistic = list(
      all_continuous() ~ "{mean} ({sd})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_continuous() ~ 2,
    label = list(
      preventable_death ~ "Preventable Death",
      housing_single_cell ~ "Single Cell Housing",
      mh_yes ~ "Mental Health Flag",
      suicidal_yes ~ "Suicidal Ideation",
      sex_male ~ "Male",
      race_black ~ "Black",
      race_hispanic ~ "Hispanic",
      age_at_time_of_death ~ "Age",
      Bexar ~ "Bexar County"
    )
  ) %>%
  bold_labels()

Characteristic	N = 390¹
Preventable Death	64 (16%)
Single Cell Housing	99 (25%)
Mental Health Flag	70 (18%)
Suicidal Ideation	28 (7.2%)
Male	346 (89%)
Black	155 (40%)
Hispanic	85 (22%)
Age	46.92 (14.89)
Bexar County	119 (31%)
¹ n (%); Mean (SD)

library(corrplot)

cor_vars <- countyjail_data %>%
  select(preventable_death, housing_single_cell, mh_yes, suicidal_yes,
         sex_male, race_black, race_hispanic, age_at_time_of_death, Bexar) %>%
  na.omit()

cor_matrix <- cor(cor_vars, method = "pearson")

corrplot(cor_matrix,
         method = "color",
         type = "upper",
         tl.cex = 0.8,
         addCoef.col = "purple",
         number.cex = 0.7,
         title = "Correlation Matrix - Custodial Death Variables",
         mar = c(0,0,1,0))

model <- glm(preventable_death ~ housing_single_cell + mh_yes + suicidal_yes +
               sex_male + race_black + race_hispanic + age_at_time_of_death + Bexar,
             data = countyjail_data,
             family = "binomial")

summary(model)

## 
## Call:
## glm(formula = preventable_death ~ housing_single_cell + mh_yes + 
##     suicidal_yes + sex_male + race_black + race_hispanic + age_at_time_of_death + 
##     Bexar, family = "binomial", data = countyjail_data)
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           1.23862    0.76230   1.625 0.104198    
## housing_single_cell   0.94777    0.33268   2.849 0.004388 ** 
## mh_yes               -0.04381    0.48404  -0.091 0.927887    
## suicidal_yes          1.70008    0.62316   2.728 0.006369 ** 
## sex_male             -0.09354    0.47275  -0.198 0.843157    
## race_black           -1.46962    0.40916  -3.592 0.000328 ***
## race_hispanic        -0.40401    0.38346  -1.054 0.292063    
## age_at_time_of_death -0.07522    0.01332  -5.645 1.65e-08 ***
## Bexar                 1.19484    0.35211   3.393 0.000690 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 348.20  on 389  degrees of freedom
## Residual deviance: 261.14  on 381  degrees of freedom
## AIC: 279.14
## 
## Number of Fisher Scoring iterations: 6

library(kableExtra)

library(broom)
library(kableExtra)

tidy(model, exponentiate = TRUE, conf.int = TRUE) %>%
  select(term, estimate, conf.low, conf.high, p.value) %>%
  rename(
    Variable    = term,
    `Odds Ratio`    = estimate,
    `95% CI Lower`  = conf.low,
    `95% CI Upper`  = conf.high,
    `P-Value`       = p.value
  ) %>%
  kable(digits = 3, caption = "Logit Model: Odds Ratios for Preventable Death") %>%
  kable_styling()

Logit Model: Odds Ratios for Preventable Death
Variable	Odds Ratio	95% CI Lower	95% CI Upper	P-Value
(Intercept)	3.451	0.775	15.652	0.104
housing_single_cell	2.580	1.344	4.980	0.004
mh_yes	0.957	0.352	2.389	0.928
suicidal_yes	5.474	1.637	19.245	0.006
sex_male	0.911	0.372	2.407	0.843
race_black	0.230	0.099	0.499	0.000
race_hispanic	0.668	0.309	1.400	0.292
age_at_time_of_death	0.928	0.902	0.951	0.000
Bexar	3.303	1.669	6.676	0.001

# Set Bexar as reference group
countyjail_data <- countyjail_data %>%
  mutate(agency_county = factor(agency_county, 
                                levels = c("BEXAR", "DALLAS", "HARRIS", "TRAVIS")))

# All county logit model
model_allcounties <- glm(preventable_death ~ housing_single_cell + mh_yes + 
                           suicidal_yes + sex_male + race_black + race_hispanic + 
                           age_at_time_of_death + agency_county,
                         data = countyjail_data,
                         family = "binomial")

summary(model_allcounties)

## 
## Call:
## glm(formula = preventable_death ~ housing_single_cell + mh_yes + 
##     suicidal_yes + sex_male + race_black + race_hispanic + age_at_time_of_death + 
##     agency_county, family = "binomial", data = countyjail_data)
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           2.44867    0.75414   3.247 0.001166 ** 
## housing_single_cell   0.84278    0.34729   2.427 0.015236 *  
## mh_yes               -0.09167    0.50668  -0.181 0.856432    
## suicidal_yes          1.73780    0.63252   2.747 0.006006 ** 
## sex_male             -0.11607    0.47236  -0.246 0.805900    
## race_black           -1.43054    0.41272  -3.466 0.000528 ***
## race_hispanic        -0.40438    0.39111  -1.034 0.301175    
## age_at_time_of_death -0.07453    0.01344  -5.546 2.92e-08 ***
## agency_countyDALLAS  -1.56923    0.55455  -2.830 0.004659 ** 
## agency_countyHARRIS  -1.19106    0.41760  -2.852 0.004343 ** 
## agency_countyTRAVIS  -0.80992    0.51963  -1.559 0.119077    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 348.20  on 389  degrees of freedom
## Residual deviance: 259.63  on 379  degrees of freedom
## AIC: 281.63
## 
## Number of Fisher Scoring iterations: 6

# Odds ratios table
tidy(model_allcounties, exponentiate = TRUE, conf.int = TRUE) %>%
  select(term, estimate, conf.low, conf.high, p.value) %>%
  rename(
    Variable       = term,
    `Odds Ratio`   = estimate,
    `95% CI Lower` = conf.low,
    `95% CI Upper` = conf.high,
    `P-Value`      = p.value
  ) %>%
  knitr::kable(digits = 3, 
               caption = "Logit Model: All County Comparisons (Reference = Bexar)") %>%
  kable_styling()

Logit Model: All County Comparisons (Reference = Bexar)
Variable	Odds Ratio	95% CI Lower	95% CI Upper	P-Value
(Intercept)	11.573	2.696	52.853	0.001
housing_single_cell	2.323	1.173	4.604	0.015
mh_yes	0.912	0.321	2.383	0.856
suicidal_yes	5.685	1.673	20.372	0.006
sex_male	0.890	0.364	2.350	0.806
race_black	0.239	0.103	0.524	0.001
race_hispanic	0.667	0.305	1.424	0.301
age_at_time_of_death	0.928	0.903	0.952	0.000
agency_countyDALLAS	0.208	0.064	0.579	0.005
agency_countyHARRIS	0.304	0.131	0.678	0.004
agency_countyTRAVIS	0.445	0.155	1.200	0.119