SECTION 1: LOAD LIBRARIES
library(tidyverse) # Data wrangling and ggplot2 visualization
library(readxl) # Read Excel files
library(psych) # Descriptive statistics
library(knitr) # Table formatting
library(broom) # Tidy model outputs
library(gtsummary) # Summary statistics tables
library(corrplot) # Correlation matrix visualization
library(kableExtra) # Enhanced table styling
SECTION 2: LOAD AND PREPARE DATA
# Load data from Excel
countyjail_data <- read_excel(
"~/Desktop/capstone bexar county jail/excel work sheets and r code/TXJail_DeathsCounties2015_2025 Capstone analysis.xlsx",
sheet = "All_Data")
# Create Bexar binary variable and set county factor levels
countyjail_data <- countyjail_data %>%
mutate(
# Binary indicator: 1 = Bexar County, 0 = all other counties
Bexar = ifelse(agency_county == "BEXAR", 1, 0),
# Set Bexar as reference group for all county comparisons
agency_county = factor(agency_county,
levels = c("BEXAR", "DALLAS", "HARRIS", "TRAVIS"))
)
# Confirm data loaded correctly
nrow(countyjail_data) # Should return 390
## [1] 390
names(countyjail_data) # View all column names
## [1] "record_id"
## [2] "agency_county"
## [3] "death_year"
## [4] "death_date"
## [5] "preventable_death"
## [6] "age_at_time_of_death"
## [7] "age_group"
## [8] "sex"
## [9] "race"
## [10] "sex_male"
## [11] "race_white"
## [12] "race_black"
## [13] "race_hispanic"
## [14] "age_26_35"
## [15] "age_36_45"
## [16] "age_46_55"
## [17] "manner_of_death"
## [18] "specific_type_of_custody_facility"
## [19] "housing_type"
## [20] "days_from_custody_to_death"
## [21] "timing_category"
## [22] "exhibit_any_mental_health_problems"
## [23] "make_suicidal_statements"
## [24] "housing_single_cell"
## [25] "housing_multiple"
## [26] "mh_unknown"
## [27] "mh_yes"
## [28] "suicidal_yes"
## [29] "county_dallas"
## [30] "county_harris"
## [31] "county_travis"
## [32] "death_from_pre_existing_medical_condition"
## [33] "type_of_offense"
## [34] "offense_1"
## [35] "were_the_charges"
## [36] "Bexar"
SECTION 3: CREATE SUMMARY DATASETS
# Four county preventable death summary
four_county_summary <- countyjail_data %>%
filter(agency_county %in% c("BEXAR", "DALLAS", "HARRIS", "TRAVIS")) %>%
group_by(agency_county) %>%
summarise(
total_deaths = n(),
preventable_deaths = sum(preventable_death == 1),
preventable_rate = mean(preventable_death) * 100,
.groups = 'drop'
)
print(four_county_summary)
## # A tibble: 4 × 4
## agency_county total_deaths preventable_deaths preventable_rate
## <fct> <int> <int> <dbl>
## 1 BEXAR 119 33 27.7
## 2 DALLAS 77 6 7.79
## 3 HARRIS 151 13 8.61
## 4 TRAVIS 43 12 27.9
# Characteristics by county
county_characteristics <- countyjail_data %>%
filter(agency_county %in% c("BEXAR", "DALLAS", "HARRIS", "TRAVIS")) %>%
group_by(agency_county) %>%
summarise(
n = n(),
preventable_rate = mean(preventable_death) * 100,
avg_age = mean(age_at_time_of_death, na.rm = TRUE),
male_pct = mean(sex_male) * 100,
black_pct = mean(race_black) * 100,
hispanic_pct = mean(race_hispanic) * 100,
mh_yes_pct = mean(mh_yes) * 100,
suicidal_pct = mean(suicidal_yes) * 100,
single_cell_pct = mean(housing_single_cell) * 100,
avg_days_custody = mean(days_from_custody_to_death, na.rm = TRUE),
.groups = 'drop'
)
print(county_characteristics)
## # A tibble: 4 × 11
## agency_county n preventable_rate avg_age male_pct black_pct hispanic_pct
## <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 BEXAR 119 27.7 45.7 85.7 22.7 38.7
## 2 DALLAS 77 7.79 49.5 90.9 46.8 20.8
## 3 HARRIS 151 8.61 48.2 91.4 54.3 5.96
## 4 TRAVIS 43 27.9 41.5 83.7 23.3 32.6
## # ℹ 4 more variables: mh_yes_pct <dbl>, suicidal_pct <dbl>,
## # single_cell_pct <dbl>, avg_days_custody <dbl>
# Preventable vs non-preventable comparison
preventable_comparison <- countyjail_data %>%
group_by(preventable_death) %>%
summarise(
n = n(),
avg_age = mean(age_at_time_of_death, na.rm = TRUE),
male_pct = mean(sex_male) * 100,
black_pct = mean(race_black) * 100,
hispanic_pct = mean(race_hispanic) * 100,
mh_yes_pct = mean(mh_yes) * 100,
suicidal_pct = mean(suicidal_yes) * 100,
avg_days_custody = mean(days_from_custody_to_death, na.rm = TRUE),
.groups = 'drop'
)
print(preventable_comparison)
## # A tibble: 2 × 9
## preventable_death n avg_age male_pct black_pct hispanic_pct mh_yes_pct
## <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 326 48.9 89.3 43.9 20.2 17.5
## 2 1 64 36.6 85.9 18.8 29.7 20.3
## # ℹ 2 more variables: suicidal_pct <dbl>, avg_days_custody <dbl>
SECTION 4: DESCRIPTIVE STATISTICS
# Overall sample descriptive statistics table
countyjail_data %>%
select(preventable_death, housing_single_cell, mh_yes, suicidal_yes,
sex_male, race_black, race_hispanic, age_at_time_of_death, Bexar) %>%
tbl_summary(
statistic = list(
all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{n} ({p}%)"
),
digits = all_continuous() ~ 2,
label = list(
preventable_death ~ "Preventable Death",
housing_single_cell ~ "Single Cell Housing",
mh_yes ~ "Mental Health Flag",
suicidal_yes ~ "Suicidal Ideation",
sex_male ~ "Male",
race_black ~ "Black",
race_hispanic ~ "Hispanic",
age_at_time_of_death ~ "Age",
Bexar ~ "Bexar County"
)
) %>%
bold_labels()
| Characteristic |
N = 390 |
| Preventable Death |
64 (16%) |
| Single Cell Housing |
99 (25%) |
| Mental Health Flag |
70 (18%) |
| Suicidal Ideation |
28 (7.2%) |
| Male |
346 (89%) |
| Black |
155 (40%) |
| Hispanic |
85 (22%) |
| Age |
46.92 (14.89) |
| Bexar County |
119 (31%) |
SECTION 5: BIVARIATE TESTS
# T-test: Age by preventable death status
t_test_age <- t.test(age_at_time_of_death ~ preventable_death,
data = countyjail_data)
print(t_test_age)
##
## Welch Two Sample t-test
##
## data: age_at_time_of_death by preventable_death
## t = 8.0366, df = 120.46, p-value = 7.136e-13
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## 9.266044 15.323872
## sample estimates:
## mean in group 0 mean in group 1
## 48.93558 36.64062
# Age summary by group
age_summary <- countyjail_data %>%
group_by(preventable_death) %>%
summarise(
n = n(),
mean_age = mean(age_at_time_of_death, na.rm = TRUE),
sd_age = sd(age_at_time_of_death, na.rm = TRUE)
)
knitr::kable(age_summary, digits = 2,
caption = "Age by Preventable Death Status")
Age by Preventable Death Status
| 0 |
326 |
48.94 |
14.82 |
| 1 |
64 |
36.64 |
10.33 |
# T-test: Days in custody by preventable death status
t_test_days <- t.test(days_from_custody_to_death ~ preventable_death,
data = countyjail_data)
print(t_test_days)
##
## Welch Two Sample t-test
##
## data: days_from_custody_to_death by preventable_death
## t = 0.68259, df = 80.006, p-value = 0.4968
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -40.96204 83.73179
## sample estimates:
## mean in group 0 mean in group 1
## 123.8067 102.4219
# Chi-square: Mental health by preventable death
mh_table <- table(countyjail_data$exhibit_any_mental_health_problems,
countyjail_data$preventable_death)
chisq_mh <- chisq.test(mh_table)
print(chisq_mh)
##
## Pearson's Chi-squared test
##
## data: mh_table
## X-squared = 5.631, df = 2, p-value = 0.05987
prop.table(mh_table, 1) * 100
##
## 0 1
## NO 77.90698 22.09302
## UNKNOWN 88.75740 11.24260
## YES 81.42857 18.57143
# Chi-square: County by preventable death
county_table <- table(countyjail_data$agency_county,
countyjail_data$preventable_death)
chisq_county <- chisq.test(county_table)
print(chisq_county)
##
## Pearson's Chi-squared test
##
## data: county_table
## X-squared = 26.13, df = 3, p-value = 8.96e-06
# Chi-square: Race by preventable death
race_table <- table(countyjail_data$race,
countyjail_data$preventable_death)
chisq_race <- chisq.test(race_table)
print(chisq_race)
##
## Pearson's Chi-squared test
##
## data: race_table
## X-squared = 18.128, df = 3, p-value = 0.0004139
# ANOVA: Age across counties
anova_age <- aov(age_at_time_of_death ~ agency_county,
data = countyjail_data %>%
filter(agency_county %in% c("BEXAR", "DALLAS", "HARRIS", "TRAVIS")))
summary(anova_age)
## Df Sum Sq Mean Sq F value Pr(>F)
## agency_county 3 2193 731.0 3.358 0.0189 *
## Residuals 386 84038 217.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Post-hoc Tukey test
TukeyHSD(anova_age)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = age_at_time_of_death ~ agency_county, data = countyjail_data %>% filter(agency_county %in% c("BEXAR", "DALLAS", "HARRIS", "TRAVIS")))
##
## $agency_county
## diff lwr upr p adj
## DALLAS-BEXAR 3.825057 -1.743174 9.39328810 0.2882031
## HARRIS-BEXAR 2.496856 -2.170039 7.16374999 0.5123687
## TRAVIS-BEXAR -4.167090 -10.941287 2.60710711 0.3871057
## HARRIS-DALLAS -1.328202 -6.659606 4.00320277 0.9180113
## TRAVIS-DALLAS -7.992147 -15.240162 -0.74413263 0.0240764
## TRAVIS-HARRIS -6.663946 -13.244860 -0.08303202 0.0458887
SECTION 6: CORRELATION MATRIX
# Build correlation matrix for all model variables
cor_vars <- countyjail_data %>%
select(preventable_death, housing_single_cell, mh_yes, suicidal_yes,
sex_male, race_black, race_hispanic, age_at_time_of_death, Bexar) %>%
na.omit()
cor_matrix <- cor(cor_vars, method = "pearson")
# Print rounded correlation values
print(round(cor_matrix, 2))
## preventable_death housing_single_cell mh_yes suicidal_yes
## preventable_death 1.00 0.19 0.03 0.12
## housing_single_cell 0.19 1.00 0.14 0.02
## mh_yes 0.03 0.14 1.00 0.47
## suicidal_yes 0.12 0.02 0.47 1.00
## sex_male -0.04 -0.02 -0.02 0.04
## race_black -0.19 -0.06 0.10 0.04
## race_hispanic 0.08 0.01 -0.09 -0.03
## age_at_time_of_death -0.31 -0.17 -0.01 0.02
## Bexar 0.20 -0.05 -0.21 -0.10
## sex_male race_black race_hispanic age_at_time_of_death
## preventable_death -0.04 -0.19 0.08 -0.31
## housing_single_cell -0.02 -0.06 0.01 -0.17
## mh_yes -0.02 0.10 -0.09 -0.01
## suicidal_yes 0.04 0.04 -0.03 0.02
## sex_male 1.00 0.02 -0.01 0.06
## race_black 0.02 1.00 -0.43 -0.03
## race_hispanic -0.01 -0.43 1.00 -0.07
## age_at_time_of_death 0.06 -0.03 -0.07 1.00
## Bexar -0.06 -0.23 0.27 -0.06
## Bexar
## preventable_death 0.20
## housing_single_cell -0.05
## mh_yes -0.21
## suicidal_yes -0.10
## sex_male -0.06
## race_black -0.23
## race_hispanic 0.27
## age_at_time_of_death -0.06
## Bexar 1.00
# Visual correlation plot
corrplot(cor_matrix,
method = "color",
type = "upper",
tl.cex = 0.8,
addCoef.col = "purple",
number.cex = 0.7,
title = "Correlation Matrix - Custodial Death Variables",
mar = c(0, 0, 1, 0))

SECTION 7: LOGISTIC REGRESSION MODELS
Model 1 — Demographics Only
model1 <- glm(preventable_death ~ age_at_time_of_death + sex_male +
race_black + race_hispanic,
data = countyjail_data,
family = binomial)
summary(model1)
##
## Call:
## glm(formula = preventable_death ~ age_at_time_of_death + sex_male +
## race_black + race_hispanic, family = binomial, data = countyjail_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.21355 0.67731 3.268 0.00108 **
## age_at_time_of_death -0.07489 0.01256 -5.961 2.51e-09 ***
## sex_male -0.15009 0.44079 -0.341 0.73347
## race_black -1.59295 0.39056 -4.079 4.53e-05 ***
## race_hispanic -0.22262 0.35661 -0.624 0.53245
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 348.20 on 389 degrees of freedom
## Residual deviance: 286.98 on 385 degrees of freedom
## AIC: 296.98
##
## Number of Fisher Scoring iterations: 5
# Odds ratios
tidy(model1, exponentiate = TRUE, conf.int = TRUE) %>%
select(term, estimate, conf.low, conf.high, p.value) %>%
rename(Variable = term,
`Odds Ratio` = estimate,
`95% CI Lower` = conf.low,
`95% CI Upper` = conf.high,
`P-Value` = p.value) %>%
kable(digits = 3,
caption = "Model 1: Odds Ratios — Demographics Only") %>%
kable_styling()
Model 1: Odds Ratios — Demographics Only
|
Variable
|
Odds Ratio
|
95% CI Lower
|
95% CI Upper
|
P-Value
|
|
(Intercept)
|
9.148
|
2.456
|
35.526
|
0.001
|
|
age_at_time_of_death
|
0.928
|
0.904
|
0.950
|
0.000
|
|
sex_male
|
0.861
|
0.374
|
2.140
|
0.733
|
|
race_black
|
0.203
|
0.091
|
0.425
|
0.000
|
|
race_hispanic
|
0.800
|
0.392
|
1.595
|
0.532
|
Model 2 — Demographics + Mental Health + Custody Time
model2 <- glm(preventable_death ~ age_at_time_of_death + sex_male +
race_black + race_hispanic + mh_yes + suicidal_yes +
days_from_custody_to_death,
data = countyjail_data,
family = binomial)
summary(model2)
##
## Call:
## glm(formula = preventable_death ~ age_at_time_of_death + sex_male +
## race_black + race_hispanic + mh_yes + suicidal_yes + days_from_custody_to_death,
## family = binomial, data = countyjail_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.335e+00 6.960e-01 3.355 0.000793 ***
## age_at_time_of_death -7.801e-02 1.288e-02 -6.058 1.38e-09 ***
## sex_male -2.530e-01 4.485e-01 -0.564 0.572647
## race_black -1.661e+00 4.011e-01 -4.142 3.44e-05 ***
## race_hispanic -2.050e-01 3.627e-01 -0.565 0.571903
## mh_yes -2.854e-01 4.698e-01 -0.607 0.543523
## suicidal_yes 1.648e+00 6.054e-01 2.722 0.006493 **
## days_from_custody_to_death 7.565e-05 8.169e-04 0.093 0.926215
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 348.20 on 389 degrees of freedom
## Residual deviance: 278.95 on 382 degrees of freedom
## AIC: 294.95
##
## Number of Fisher Scoring iterations: 5
# Odds ratios
tidy(model2, exponentiate = TRUE, conf.int = TRUE) %>%
select(term, estimate, conf.low, conf.high, p.value) %>%
rename(Variable = term,
`Odds Ratio` = estimate,
`95% CI Lower` = conf.low,
`95% CI Upper` = conf.high,
`P-Value` = p.value) %>%
kable(digits = 3,
caption = "Model 2: Odds Ratios — Demographics + Mental Health") %>%
kable_styling()
Model 2: Odds Ratios — Demographics + Mental Health
|
Variable
|
Odds Ratio
|
95% CI Lower
|
95% CI Upper
|
P-Value
|
|
(Intercept)
|
10.330
|
2.683
|
41.805
|
0.001
|
|
age_at_time_of_death
|
0.925
|
0.901
|
0.948
|
0.000
|
|
sex_male
|
0.776
|
0.332
|
1.954
|
0.573
|
|
race_black
|
0.190
|
0.083
|
0.405
|
0.000
|
|
race_hispanic
|
0.815
|
0.394
|
1.644
|
0.572
|
|
mh_yes
|
0.752
|
0.282
|
1.811
|
0.544
|
|
suicidal_yes
|
5.195
|
1.592
|
17.464
|
0.006
|
|
days_from_custody_to_death
|
1.000
|
0.998
|
1.002
|
0.926
|
Model 3 — Primary Model (Professor’s Specification): Bexar
Binary
# This is the main model as specified by the capstone advisor
model <- glm(preventable_death ~ housing_single_cell + mh_yes + suicidal_yes +
sex_male + race_black + race_hispanic + age_at_time_of_death + Bexar,
data = countyjail_data,
family = "binomial")
summary(model)
##
## Call:
## glm(formula = preventable_death ~ housing_single_cell + mh_yes +
## suicidal_yes + sex_male + race_black + race_hispanic + age_at_time_of_death +
## Bexar, family = "binomial", data = countyjail_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.23862 0.76230 1.625 0.104198
## housing_single_cell 0.94777 0.33268 2.849 0.004388 **
## mh_yes -0.04381 0.48404 -0.091 0.927887
## suicidal_yes 1.70008 0.62316 2.728 0.006369 **
## sex_male -0.09354 0.47275 -0.198 0.843157
## race_black -1.46962 0.40916 -3.592 0.000328 ***
## race_hispanic -0.40401 0.38346 -1.054 0.292063
## age_at_time_of_death -0.07522 0.01332 -5.645 1.65e-08 ***
## Bexar 1.19484 0.35211 3.393 0.000690 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 348.20 on 389 degrees of freedom
## Residual deviance: 261.14 on 381 degrees of freedom
## AIC: 279.14
##
## Number of Fisher Scoring iterations: 6
# Odds ratios table
tidy(model, exponentiate = TRUE, conf.int = TRUE) %>%
select(term, estimate, conf.low, conf.high, p.value) %>%
rename(Variable = term,
`Odds Ratio` = estimate,
`95% CI Lower` = conf.low,
`95% CI Upper` = conf.high,
`P-Value` = p.value) %>%
kable(digits = 3,
caption = "Table 1: Logit Model Odds Ratios for Preventable Death (Bexar Binary)") %>%
kable_styling()
Table 1: Logit Model Odds Ratios for Preventable Death (Bexar Binary)
|
Variable
|
Odds Ratio
|
95% CI Lower
|
95% CI Upper
|
P-Value
|
|
(Intercept)
|
3.451
|
0.775
|
15.652
|
0.104
|
|
housing_single_cell
|
2.580
|
1.344
|
4.980
|
0.004
|
|
mh_yes
|
0.957
|
0.352
|
2.389
|
0.928
|
|
suicidal_yes
|
5.474
|
1.637
|
19.245
|
0.006
|
|
sex_male
|
0.911
|
0.372
|
2.407
|
0.843
|
|
race_black
|
0.230
|
0.099
|
0.499
|
0.000
|
|
race_hispanic
|
0.668
|
0.309
|
1.400
|
0.292
|
|
age_at_time_of_death
|
0.928
|
0.902
|
0.951
|
0.000
|
|
Bexar
|
3.303
|
1.669
|
6.676
|
0.001
|
Model 4 — All County Comparison (Reference = Bexar)
# Supplemental model comparing all counties to Bexar as reference group
model_allcounties <- glm(preventable_death ~ housing_single_cell + mh_yes +
suicidal_yes + sex_male + race_black + race_hispanic +
age_at_time_of_death + agency_county,
data = countyjail_data,
family = "binomial")
summary(model_allcounties)
##
## Call:
## glm(formula = preventable_death ~ housing_single_cell + mh_yes +
## suicidal_yes + sex_male + race_black + race_hispanic + age_at_time_of_death +
## agency_county, family = "binomial", data = countyjail_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.44867 0.75414 3.247 0.001166 **
## housing_single_cell 0.84278 0.34729 2.427 0.015236 *
## mh_yes -0.09167 0.50668 -0.181 0.856432
## suicidal_yes 1.73780 0.63252 2.747 0.006006 **
## sex_male -0.11607 0.47236 -0.246 0.805900
## race_black -1.43054 0.41272 -3.466 0.000528 ***
## race_hispanic -0.40438 0.39111 -1.034 0.301175
## age_at_time_of_death -0.07453 0.01344 -5.546 2.92e-08 ***
## agency_countyDALLAS -1.56923 0.55455 -2.830 0.004659 **
## agency_countyHARRIS -1.19106 0.41760 -2.852 0.004343 **
## agency_countyTRAVIS -0.80992 0.51963 -1.559 0.119077
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 348.20 on 389 degrees of freedom
## Residual deviance: 259.63 on 379 degrees of freedom
## AIC: 281.63
##
## Number of Fisher Scoring iterations: 6
# Odds ratios table
tidy(model_allcounties, exponentiate = TRUE, conf.int = TRUE) %>%
select(term, estimate, conf.low, conf.high, p.value) %>%
rename(Variable = term,
`Odds Ratio` = estimate,
`95% CI Lower` = conf.low,
`95% CI Upper` = conf.high,
`P-Value` = p.value) %>%
knitr::kable(digits = 3,
caption = "Table 2: Logit Model — All County Comparisons (Reference = Bexar)") %>%
kable_styling()
Table 2: Logit Model — All County Comparisons (Reference = Bexar)
|
Variable
|
Odds Ratio
|
95% CI Lower
|
95% CI Upper
|
P-Value
|
|
(Intercept)
|
11.573
|
2.696
|
52.853
|
0.001
|
|
housing_single_cell
|
2.323
|
1.173
|
4.604
|
0.015
|
|
mh_yes
|
0.912
|
0.321
|
2.383
|
0.856
|
|
suicidal_yes
|
5.685
|
1.673
|
20.372
|
0.006
|
|
sex_male
|
0.890
|
0.364
|
2.350
|
0.806
|
|
race_black
|
0.239
|
0.103
|
0.524
|
0.001
|
|
race_hispanic
|
0.667
|
0.305
|
1.424
|
0.301
|
|
age_at_time_of_death
|
0.928
|
0.903
|
0.952
|
0.000
|
|
agency_countyDALLAS
|
0.208
|
0.064
|
0.579
|
0.005
|
|
agency_countyHARRIS
|
0.304
|
0.131
|
0.678
|
0.004
|
|
agency_countyTRAVIS
|
0.445
|
0.155
|
1.200
|
0.119
|
Model Fit Comparison
# AIC comparison — lower AIC = better fit
cat("Model 1 AIC (Demographics Only):", AIC(model1), "\n")
## Model 1 AIC (Demographics Only): 296.981
cat("Model 2 AIC (+ Mental Health):", AIC(model2), "\n")
## Model 2 AIC (+ Mental Health): 294.9452
cat("Model 3 AIC (Primary - Bexar Binary):", AIC(model), "\n")
## Model 3 AIC (Primary - Bexar Binary): 279.144
cat("Model 4 AIC (All Counties):", AIC(model_allcounties), "\n")
## Model 4 AIC (All Counties): 281.6312
# McFadden's Pseudo R-squared
cat("\nMcFadden's Pseudo R-squared:\n")
##
## McFadden's Pseudo R-squared:
cat("Model 1:", round(1 - (model1$deviance / model1$null.deviance), 4), "\n")
## Model 1: 0.1758
cat("Model 2:", round(1 - (model2$deviance / model2$null.deviance), 4), "\n")
## Model 2: 0.1989
cat("Model 3:", round(1 - (model$deviance / model$null.deviance), 4), "\n")
## Model 3: 0.25
cat("Model 4:", round(1 - (model_allcounties$deviance / model_allcounties$null.deviance), 4), "\n")
## Model 4: 0.2544
SECTION 8: VISUALIZATIONS
SECTION 9: KEY RESULTS REFERENCE
cat("=== KEY FINDINGS REFERENCE ===\n\n")
## === KEY FINDINGS REFERENCE ===
cat("SAMPLE:\n")
## SAMPLE:
cat("Total deaths:", nrow(countyjail_data), "\n")
## Total deaths: 390
cat("Preventable deaths: 64 (16.4%)\n\n")
## Preventable deaths: 64 (16.4%)
cat("COUNTY PREVENTABLE DEATH RATES:\n")
## COUNTY PREVENTABLE DEATH RATES:
print(four_county_summary)
## # A tibble: 4 × 4
## agency_county total_deaths preventable_deaths preventable_rate
## <fct> <int> <int> <dbl>
## 1 BEXAR 119 33 27.7
## 2 DALLAS 77 6 7.79
## 3 HARRIS 151 13 8.61
## 4 TRAVIS 43 12 27.9
cat("\nPRIMARY MODEL — SIGNIFICANT PREDICTORS:\n")
##
## PRIMARY MODEL — SIGNIFICANT PREDICTORS:
cat("Bexar County: OR = 3.303, 95% CI [1.669, 6.676], p = .001\n")
## Bexar County: OR = 3.303, 95% CI [1.669, 6.676], p = .001
cat("Suicidal Ideation: OR = 5.474, 95% CI [1.637, 19.245], p = .006\n")
## Suicidal Ideation: OR = 5.474, 95% CI [1.637, 19.245], p = .006
cat("Single Cell Housing: OR = 2.580, 95% CI [1.344, 4.980], p = .004\n")
## Single Cell Housing: OR = 2.580, 95% CI [1.344, 4.980], p = .004
cat("Age (per year): OR = 0.928, 95% CI [0.902, 0.951], p < .001\n")
## Age (per year): OR = 0.928, 95% CI [0.902, 0.951], p < .001
cat("Race (Black): OR = 0.230, 95% CI [0.099, 0.499], p < .001\n\n")
## Race (Black): OR = 0.230, 95% CI [0.099, 0.499], p < .001
cat("ALL COUNTY MODEL — COUNTY COMPARISONS VS BEXAR:\n")
## ALL COUNTY MODEL — COUNTY COMPARISONS VS BEXAR:
cat("Dallas vs Bexar: OR = 0.208, 95% CI [0.064, 0.579], p = .005\n")
## Dallas vs Bexar: OR = 0.208, 95% CI [0.064, 0.579], p = .005
cat("Harris vs Bexar: OR = 0.304, 95% CI [0.131, 0.678], p = .004\n")
## Harris vs Bexar: OR = 0.304, 95% CI [0.131, 0.678], p = .004
cat("Travis vs Bexar: OR = 0.445, 95% CI [0.155, 1.200], p = .119 (ns)\n\n")
## Travis vs Bexar: OR = 0.445, 95% CI [0.155, 1.200], p = .119 (ns)
cat("MODEL FIT:\n")
## MODEL FIT:
cat("Primary Model AIC:", round(AIC(model), 2), "\n")
## Primary Model AIC: 279.14
cat("All County Model AIC:", round(AIC(model_allcounties), 2), "\n")
## All County Model AIC: 281.63
cat("Primary Model McFadden R2:",
round(1 - (model$deviance / model$null.deviance), 4), "\n")
## Primary Model McFadden R2: 0.25