library(readxl)
library(dplyr)
library(ggplot2)
library(knitr)
# df <- read_excel("TXJail_DeathsCounties2015_2025_Capstone_analysis.xlsx"

1 DATASET OVERVIEW

Dataset: TXJail_DeathsCounties2015_2025_Capstone_analysis.xlsx
Study Period: January 1, 2015 - December 31, 2025
Total Observations: N = 390 custodial deaths
Analysis Date: February 2026

1.1 Data Source

Texas Justice Initiative (TJI) custodial death reports, compiled from Texas Attorney General mandatory reporting under Texas Public Information Act.

Website: https://texasjusticeinitiative.org/datasets/custodial-deaths

1.2 Sample Composition

Deaths in four largest Texas county jails:

County N Deaths % of Sample Preventable Deaths Preventable Rate
Bexar 119 30.5% 33 27.7%
Dallas 77 19.7% 6 7.8%
Harris 151 38.7% 13 8.6%
Travis 43 11.0% 12 27.9%
Total 390 100% 64 16.4%

2 VARIABLE DEFINITIONS

2.1 Dependent Variable

2.1.1 preventable_death

Type: Binary (0/1)

Definition: Deaths from suicide OR drug/alcohol intoxication

Coding: - 0 = Non-Preventable (N=326, 83.6%) - 1 = Preventable (N=64, 16.4%)

Categories included as Preventable: - Suicide: 62 deaths (15.9%) - Drug/alcohol intoxication: 2 deaths (0.5%)

Categories coded as Non-Preventable: - Natural causes/illness: 213 deaths (54.6%) - Homicide: 28 deaths (7.2%) - Accidental (non-drug): 36 deaths (9.2%) - Other: 49 deaths (12.6%)

R Code:

df$preventable_death <- ifelse(
  df$manner_of_death %in% c("SUICIDE", "ALCOHOL OR DRUG INTOXICATION"),
  1,  # Preventable
  0   # Not preventable
)

2.2 Demographic Variables

2.2.1 age_at_time_of_death

  • Type: Continuous
  • Range: 18-89 years
  • Mean (SD): 43.8 (13.6)
  • Median: 42 years
  • Use: Primary age predictor in regression models

R Code:

# Age is used as continuous variable
summary(df$age_at_time_of_death)

2.2.2 age_group

  • Type: Categorical
  • Categories: 18-25, 26-35, 36-45, 46-55, 56-65, 66+
  • Highest risk: Ages 26-35 (31.2% of all deaths)

R Code:

df$age_group <- cut(df$age_at_time_of_death,
                    breaks = c(18, 25, 35, 45, 55, 65, 100),
                    labels = c("18-25", "26-35", "36-45", "46-55", "56-65", "66+"),
                    right = TRUE, include.lowest = TRUE)

2.2.3 sex

  • Type: Categorical (MALE, FEMALE)
  • Distribution: Male: 87.9% | Female: 12.1%

2.2.4 race

  • Type: Categorical
  • Categories: WHITE (32.1%), BLACK (23.6%), HISPANIC (39.7%), OTHER (4.6%)

2.3 Binary Demographic Variables (Regression-Ready)

2.3.1 sex_male

  • Coding: 0 = Female (reference), 1 = Male
  • Use: Sex predictor in models
df$sex_male <- ifelse(df$sex == "MALE", 1, 0)

2.3.2 race_black

  • Coding: 0 = Not Black, 1 = Black
  • Reference: Other race
  • Finding: Protective effect (OR = 0.23, 77% lower odds)
df$race_black <- ifelse(df$race == "BLACK", 1, 0)

2.3.3 race_hispanic

  • Coding: 0 = Not Hispanic, 1 = Hispanic
  • Finding: No significant difference from reference
df$race_hispanic <- ifelse(df$race == "HISPANIC", 1, 0)

2.3.4 race_white

  • Coding: 0 = Not White, 1 = White
  • Note: Created for descriptive analysis
df$race_white <- ifelse(df$race == "WHITE", 1, 0)

2.4 Institutional Variables

2.4.1 exhibit_any_mental_health_problems

  • Type: Categorical (YES, NO, UNKNOWN)
  • Distribution: YES: 20.8% | NO: 35.9% | UNKNOWN: 43.3%
  • Important: UNKNOWN = screening/documentation failure

2.4.2 make_suicidal_statements

  • Type: Categorical (YES, NO, UNKNOWN)
  • Distribution: YES: 4.6% (only 18 identified!) | NO: 66.9% | UNKNOWN: 28.5%
  • Critical: 62 died by suicide, only 18 were identified at intake

2.4.3 days_from_custody_to_death

  • Type: Continuous
  • Range: 0-18,760 days
  • Mean (SD): 224 (883)
  • Median: 29 days
  • Note: Highly right-skewed distribution

2.5 Binary Institutional Variables (Regression-Ready)

2.5.1 mh_yes

  • Coding: 0 = No MH problems documented, 1 = MH problems documented
  • Distribution: 20.8% documented with MH problems
df$mh_yes <- ifelse(df$exhibit_any_mental_health_problems == "YES", 1, 0)

2.5.2 mh_unknown

  • Coding: 0 = MH status documented, 1 = UNKNOWN
  • Distribution: 43.3% unknown (screening failure proxy)
  • Use: Proxy for screening quality (higher % = worse screening)
df$mh_unknown <- ifelse(df$exhibit_any_mental_health_problems == "UNKNOWN", 1, 0)

2.5.3 suicidal_yes

  • Coding: 0 = No suicidal statements, 1 = Suicidal statements documented
  • Distribution: Only 18 cases (4.6%)
  • Finding: Strongest predictor (OR = 5.50, p = .006)
df$suicidal_yes <- ifelse(df$make_suicidal_statements == "YES", 1, 0)

2.5.4 housing_single_cell

  • Coding: 0 = Not single cell, 1 = Single cell housing
  • Distribution: 25.4% in single cell
  • Finding: Associated with higher preventable death risk
df$housing_single_cell <- ifelse(df$housing_type == "Single Cell", 1, 0)

2.6 County Variables (Reference: Bexar)

Why Bexar is the reference: Highest preventable death rate (27.7%)

2.6.1 county_dallas

  • Coding: 0 = Not Dallas, 1 = Dallas County
  • Preventable Rate: 7.8% (lowest)
  • Model 3 Finding: OR = 0.21 (79% lower odds than Bexar)
df$county_dallas <- ifelse(df$agency_county == "DALLAS", 1, 0)

2.6.2 county_harris

  • Coding: 0 = Not Harris, 1 = Harris County
  • Preventable Rate: 8.6%
  • Model 3 Finding: OR = 0.32 (68% lower odds than Bexar)
df$county_harris <- ifelse(df$agency_county == "HARRIS", 1, 0)

2.6.3 county_travis

  • Coding: 0 = Not Travis, 1 = Travis County
  • Preventable Rate: 27.9% (similar to Bexar)
  • Model 3 Finding: OR = 0.66 (not significantly different from Bexar)
df$county_travis <- ifelse(df$agency_county == "TRAVIS", 1, 0)

3 ANALYSIS CODE

3.1 Complete Variable Creation

# Preventable death (DEPENDENT VARIABLE)
df$preventable_death <- ifelse(
  df$manner_of_death %in% c("SUICIDE", "ALCOHOL OR DRUG INTOXICATION"),
  1, 0
)

# Sex dummy
df$sex_male <- ifelse(df$sex == "MALE", 1, 0)

# Race dummies (reference: OTHER)
df$race_white <- ifelse(df$race == "WHITE", 1, 0)
df$race_black <- ifelse(df$race == "BLACK", 1, 0)
df$race_hispanic <- ifelse(df$race == "HISPANIC", 1, 0)

# Age group dummies
df$age_26_35 <- ifelse(df$age_at_time_of_death >= 26 & 
                       df$age_at_time_of_death <= 35, 1, 0)
df$age_36_45 <- ifelse(df$age_at_time_of_death >= 36 & 
                       df$age_at_time_of_death <= 45, 1, 0)
df$age_46_55 <- ifelse(df$age_at_time_of_death >= 46 & 
                       df$age_at_time_of_death <= 55, 1, 0)

# Mental health dummies
df$mh_yes <- ifelse(df$exhibit_any_mental_health_problems == "YES", 1, 0)
df$mh_unknown <- ifelse(df$exhibit_any_mental_health_problems == "UNKNOWN", 1, 0)

# Suicidal statements dummy
df$suicidal_yes <- ifelse(df$make_suicidal_statements == "YES", 1, 0)

# Housing dummies
df$housing_single_cell <- ifelse(df$housing_type == "Single Cell", 1, 0)
df$housing_multiple <- ifelse(df$housing_type == "Multiple Occupancy", 1, 0)

# County dummies (reference: BEXAR)
df$county_dallas <- ifelse(df$agency_county == "DALLAS", 1, 0)
df$county_harris <- ifelse(df$agency_county == "HARRIS", 1, 0)
df$county_travis <- ifelse(df$agency_county == "TRAVIS", 1, 0)

3.2 Logistic Regression Models

3.2.1 Model 1: Demographics Only

model1 <- glm(preventable_death ~ age_at_time_of_death + sex_male + 
              race_black + race_hispanic,
              data = df,
              family = binomial)

summary(model1)
exp(coef(model1))  # Odds ratios

3.2.2 Model 2: Add Mental Health Variables

model2 <- glm(preventable_death ~ age_at_time_of_death + sex_male + 
              race_black + race_hispanic + mh_yes + suicidal_yes +
              days_from_custody_to_death,
              data = df,
              family = binomial)

summary(model2)

3.2.3 Model 3: Full Model with County Effects

model3 <- glm(preventable_death ~ age_at_time_of_death + sex_male + 
              race_black + race_hispanic + mh_yes + suicidal_yes +
              days_from_custody_to_death + county_dallas + county_harris + 
              county_travis,
              data = df,
              family = binomial)

summary(model3)

# Create odds ratio table
model3_or <- data.frame(
  Variable = names(coef(model3)),
  Odds_Ratio = exp(coef(model3)),
  CI_Lower = exp(confint(model3))[,1],
  CI_Upper = exp(confint(model3))[,2],
  P_Value = summary(model3)$coefficients[,4]
)

print(model3_or)

4 MODEL 3 RESULTS

4.1 Coefficients and Odds Ratios

Variable Odds Ratio 95% CI Lower 95% CI Upper P-Value Interpretation
(Intercept) 15.762 3.811 70.349 0.000 Baseline odds (Bexar, other race, female)
age_at_time_of_death 0.927 0.902 0.950 0.000 7.3% lower odds per year ✓
sex_male 0.852 0.352 2.226 0.732 Not significant
race_black 0.229 0.098 0.503 0.000 77% lower odds (protective) ✓
race_hispanic 0.640 0.296 1.349 0.246 Not significant
mh_yes 0.926 0.330 2.391 0.878 Not significant
suicidal_yes 5.498 1.651 19.064 0.006 5.5x higher odds ✓
days_from_custody_to_death 1.000 0.999 1.002 0.691 Not significant
county_dallas 0.209 0.065 0.577 0.004 79% lower odds than Bexar ✓
county_harris 0.317 0.138 0.700 0.005 68% lower odds than Bexar ✓
county_travis 0.659 0.248 1.662 0.388 Not different from Bexar

✓ = Statistically significant (p < .05)


5 HYPOTHESIS TESTING

5.1 H1: Younger Age → Higher Risk

Status:SUPPORTED

  • Age coefficient: β = -0.078, p < .001
  • Odds Ratio: 0.927
  • Interpretation: Each additional year of age associated with 7.3% lower odds of preventable death
  • Ages 26-35 represent highest-risk group (31.2% of preventable deaths)

5.2 H2: White Race → Higher Risk than Black

Status:SUPPORTED

  • Black race coefficient: β = -1.475, p < .001
  • Odds Ratio: 0.229
  • Interpretation: Black inmates have 77% lower odds than reference category (Other race)
  • White inmates: 23.6% preventable death rate vs. Black: 8.7%

5.3 H3: Suicidal Statements → Higher Risk

Status:STRONGLY SUPPORTED

  • Suicidal statements coefficient: β = 1.704, p = .006
  • Odds Ratio: 5.498
  • Interpretation: Inmates who made suicidal statements have 5.5 times higher odds
  • Critical caveat: Only 18 of 390 inmates (4.6%) identified; 62 died by suicide

5.4 H4: County Effects Persist After Controls

Status:SUPPORTED

  • Dallas County: OR = 0.21, p = .004 (79% lower odds than Bexar)
  • Harris County: OR = 0.32, p = .005 (68% lower odds than Bexar)
  • Travis County: OR = 0.66, p = .388 (not significantly different)
  • Interpretation: County effects remain after controlling for demographics, mental health, suicidal statements, and days in custody → institutional factors drive outcomes

6 KEY FINDINGS SUMMARY

6.1 The Screening Paradox

Counties ranked by screening quality (% UNKNOWN MH status):

County % MH Unknown Screening Quality Suicide Rate Paradox
Travis 11.6% ★★★★ Best 25.6% ⚠️ High suicide despite best screening
Bexar 33.6% ★★★ Good 27.7% ⚠️ High suicide despite good screening
Dallas 41.6% ★★ Poor 7.8% ✓ Low suicide despite poor screening
Harris 60.9% ★ Worst 7.9% ✓ Low suicide despite worst screening

Conclusion: Documentation quality ≠ Prevention effectiveness

6.2 Dallas County Success Story

Intervention Success Rate: - 11 inmates identified as suicidal at intake - 0 died by suicide - 100% intervention success

Compare to Bexar County: - 4 inmates identified as suicidal - 3 died by suicide - 75% intervention failure

Implication: Post-intake monitoring and intervention > Intake screening


7 MISSING DATA

Variable N Missing % Missing Treatment
Core variables 0 0.0% Complete data
housing_type 2 0.5% Excluded from housing analysis
exhibit_any_mental_health_problems 65 16.7% Coded as “UNKNOWN” category
make_suicidal_statements 65 16.7% Coded as “UNKNOWN” category
type_of_offense 75 19.2% Not used in primary models

Philosophy: “UNKNOWN” mental health status treated as substantively meaningful (screening failure) rather than missing data.


8 DATA CITATION

Primary Data Source:

Texas Justice Initiative. (2025). Custodial deaths in Texas, 2015-2025 [Dataset]. Retrieved from https://texasjusticeinitiative.org/datasets/custodial-deaths

Analysis:

Palma, J. (2026). Preventable deaths in Texas county jails: A comparative analysis of institutional failures (2015-2025) [Master’s capstone]. University of Texas at San Antonio.


9 CONTACT INFORMATION

For questions about this codebook or analysis:

Janice Palma, MPA Candidate
University of Texas at San Antonio

Data source inquiries:

Texas Justice Initiative
Website: https://texasjusticeinitiative.org


Last Updated: February 20, 2026
Version: 2.0 (Error-proof edition)
Document Status: Final


10 APPENDIX: REFERENCE CATEGORIES

Variable Type Reference Category How to Interpret Coefficients
Sex Female (sex_male = 0) Male coefficient = effect of being male vs. female
Race Other (all race dummies = 0) Race coefficients = effect vs. Other race
Age Continuous (no reference) Per-year effect on log-odds
Mental Health NO problems (mh_yes = 0) mh_yes = effect of documented problems vs. no problems
Suicidal NO statements (suicidal_yes = 0) suicidal_yes = effect of statements vs. no statements
County BEXAR (all county dummies = 0) County coefficients = effect vs. Bexar County

All binary variables coded 0/1 where 1 = presence of characteristic


END OF CODEBOOK