library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(psych)
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(tidyverse)
library(readxl)
library(psych)
library(knitr)
library(broom)
library(gtsummary)
library(corrplot)
## corrplot 0.95 loaded
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
countyjail_data <- read_excel(
"~/Desktop/capstone bexar county jail/excel work sheets and r code/TXJail_DeathsCounties2015_2025 Capstone analysis.xlsx",
sheet = "All_Data")
countyjail_data <- countyjail_data %>%
mutate(Bexar = ifelse(agency_county == "BEXAR", 1, 0))
library(gtsummary)
library(gtsummary)
countyjail_data %>%
select(preventable_death, housing_single_cell, mh_yes, suicidal_yes,
sex_male, race_black, race_hispanic, age_at_time_of_death, Bexar) %>%
tbl_summary(
statistic = list(
all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{n} ({p}%)"
),
digits = all_continuous() ~ 2,
label = list(
preventable_death ~ "Preventable Death",
housing_single_cell ~ "Single Cell Housing",
mh_yes ~ "Mental Health Flag",
suicidal_yes ~ "Suicidal Ideation",
sex_male ~ "Male",
race_black ~ "Black",
race_hispanic ~ "Hispanic",
age_at_time_of_death ~ "Age",
Bexar ~ "Bexar County"
)
) %>%
bold_labels()
| Characteristic |
N = 390 |
| Preventable Death |
64 (16%) |
| Single Cell Housing |
99 (25%) |
| Mental Health Flag |
70 (18%) |
| Suicidal Ideation |
28 (7.2%) |
| Male |
346 (89%) |
| Black |
155 (40%) |
| Hispanic |
85 (22%) |
| Age |
46.92 (14.89) |
| Bexar County |
119 (31%) |
library(corrplot)
library(gtsummary)
countyjail_data %>%
select(preventable_death, housing_single_cell, mh_yes, suicidal_yes,
sex_male, race_black, race_hispanic, age_at_time_of_death, Bexar) %>%
tbl_summary(
statistic = list(
all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{n} ({p}%)"
),
digits = all_continuous() ~ 2,
label = list(
preventable_death ~ "Preventable Death",
housing_single_cell ~ "Single Cell Housing",
mh_yes ~ "Mental Health Flag",
suicidal_yes ~ "Suicidal Ideation",
sex_male ~ "Male",
race_black ~ "Black",
race_hispanic ~ "Hispanic",
age_at_time_of_death ~ "Age",
Bexar ~ "Bexar County"
)
) %>%
bold_labels()
| Characteristic |
N = 390 |
| Preventable Death |
64 (16%) |
| Single Cell Housing |
99 (25%) |
| Mental Health Flag |
70 (18%) |
| Suicidal Ideation |
28 (7.2%) |
| Male |
346 (89%) |
| Black |
155 (40%) |
| Hispanic |
85 (22%) |
| Age |
46.92 (14.89) |
| Bexar County |
119 (31%) |
library(corrplot)
cor_vars <- countyjail_data %>%
select(preventable_death, housing_single_cell, mh_yes, suicidal_yes,
sex_male, race_black, race_hispanic, age_at_time_of_death, Bexar) %>%
na.omit()
cor_matrix <- cor(cor_vars, method = "pearson")
corrplot(cor_matrix,
method = "color",
type = "upper",
tl.cex = 0.8,
addCoef.col = "purple",
number.cex = 0.7,
title = "Correlation Matrix - Custodial Death Variables",
mar = c(0,0,1,0))

model <- glm(preventable_death ~ housing_single_cell + mh_yes + suicidal_yes +
sex_male + race_black + race_hispanic + age_at_time_of_death + Bexar,
data = countyjail_data,
family = "binomial")
summary(model)
##
## Call:
## glm(formula = preventable_death ~ housing_single_cell + mh_yes +
## suicidal_yes + sex_male + race_black + race_hispanic + age_at_time_of_death +
## Bexar, family = "binomial", data = countyjail_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.23862 0.76230 1.625 0.104198
## housing_single_cell 0.94777 0.33268 2.849 0.004388 **
## mh_yes -0.04381 0.48404 -0.091 0.927887
## suicidal_yes 1.70008 0.62316 2.728 0.006369 **
## sex_male -0.09354 0.47275 -0.198 0.843157
## race_black -1.46962 0.40916 -3.592 0.000328 ***
## race_hispanic -0.40401 0.38346 -1.054 0.292063
## age_at_time_of_death -0.07522 0.01332 -5.645 1.65e-08 ***
## Bexar 1.19484 0.35211 3.393 0.000690 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 348.20 on 389 degrees of freedom
## Residual deviance: 261.14 on 381 degrees of freedom
## AIC: 279.14
##
## Number of Fisher Scoring iterations: 6
library(kableExtra)
library(broom)
library(kableExtra)
tidy(model, exponentiate = TRUE, conf.int = TRUE) %>%
select(term, estimate, conf.low, conf.high, p.value) %>%
rename(
Variable = term,
`Odds Ratio` = estimate,
`95% CI Lower` = conf.low,
`95% CI Upper` = conf.high,
`P-Value` = p.value
) %>%
kable(digits = 3, caption = "Logit Model: Odds Ratios for Preventable Death") %>%
kable_styling()
Logit Model: Odds Ratios for Preventable Death
|
Variable
|
Odds Ratio
|
95% CI Lower
|
95% CI Upper
|
P-Value
|
|
(Intercept)
|
3.451
|
0.775
|
15.652
|
0.104
|
|
housing_single_cell
|
2.580
|
1.344
|
4.980
|
0.004
|
|
mh_yes
|
0.957
|
0.352
|
2.389
|
0.928
|
|
suicidal_yes
|
5.474
|
1.637
|
19.245
|
0.006
|
|
sex_male
|
0.911
|
0.372
|
2.407
|
0.843
|
|
race_black
|
0.230
|
0.099
|
0.499
|
0.000
|
|
race_hispanic
|
0.668
|
0.309
|
1.400
|
0.292
|
|
age_at_time_of_death
|
0.928
|
0.902
|
0.951
|
0.000
|
|
Bexar
|
3.303
|
1.669
|
6.676
|
0.001
|
# Set Bexar as reference group
countyjail_data <- countyjail_data %>%
mutate(agency_county = factor(agency_county,
levels = c("BEXAR", "DALLAS", "HARRIS", "TRAVIS")))
# All county logit model
model_allcounties <- glm(preventable_death ~ housing_single_cell + mh_yes +
suicidal_yes + sex_male + race_black + race_hispanic +
age_at_time_of_death + agency_county,
data = countyjail_data,
family = "binomial")
summary(model_allcounties)
##
## Call:
## glm(formula = preventable_death ~ housing_single_cell + mh_yes +
## suicidal_yes + sex_male + race_black + race_hispanic + age_at_time_of_death +
## agency_county, family = "binomial", data = countyjail_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.44867 0.75414 3.247 0.001166 **
## housing_single_cell 0.84278 0.34729 2.427 0.015236 *
## mh_yes -0.09167 0.50668 -0.181 0.856432
## suicidal_yes 1.73780 0.63252 2.747 0.006006 **
## sex_male -0.11607 0.47236 -0.246 0.805900
## race_black -1.43054 0.41272 -3.466 0.000528 ***
## race_hispanic -0.40438 0.39111 -1.034 0.301175
## age_at_time_of_death -0.07453 0.01344 -5.546 2.92e-08 ***
## agency_countyDALLAS -1.56923 0.55455 -2.830 0.004659 **
## agency_countyHARRIS -1.19106 0.41760 -2.852 0.004343 **
## agency_countyTRAVIS -0.80992 0.51963 -1.559 0.119077
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 348.20 on 389 degrees of freedom
## Residual deviance: 259.63 on 379 degrees of freedom
## AIC: 281.63
##
## Number of Fisher Scoring iterations: 6
# Odds ratios table
tidy(model_allcounties, exponentiate = TRUE, conf.int = TRUE) %>%
select(term, estimate, conf.low, conf.high, p.value) %>%
rename(
Variable = term,
`Odds Ratio` = estimate,
`95% CI Lower` = conf.low,
`95% CI Upper` = conf.high,
`P-Value` = p.value
) %>%
knitr::kable(digits = 3,
caption = "Logit Model: All County Comparisons (Reference = Bexar)") %>%
kable_styling()
Logit Model: All County Comparisons (Reference = Bexar)
|
Variable
|
Odds Ratio
|
95% CI Lower
|
95% CI Upper
|
P-Value
|
|
(Intercept)
|
11.573
|
2.696
|
52.853
|
0.001
|
|
housing_single_cell
|
2.323
|
1.173
|
4.604
|
0.015
|
|
mh_yes
|
0.912
|
0.321
|
2.383
|
0.856
|
|
suicidal_yes
|
5.685
|
1.673
|
20.372
|
0.006
|
|
sex_male
|
0.890
|
0.364
|
2.350
|
0.806
|
|
race_black
|
0.239
|
0.103
|
0.524
|
0.001
|
|
race_hispanic
|
0.667
|
0.305
|
1.424
|
0.301
|
|
age_at_time_of_death
|
0.928
|
0.903
|
0.952
|
0.000
|
|
agency_countyDALLAS
|
0.208
|
0.064
|
0.579
|
0.005
|
|
agency_countyHARRIS
|
0.304
|
0.131
|
0.678
|
0.004
|
|
agency_countyTRAVIS
|
0.445
|
0.155
|
1.200
|
0.119
|