Texas.Data

Quarto

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

library(readr)
library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

Texas_Pop <- read.csv("~/Desktop/High_Value_Dataset__July_2024_20241022 (1).csv")
Texas_Release <- read.csv("~/Desktop/Texas_Department_of_Criminal_Justice_Releases_FY_2023_20241022 (1).csv")

#Clean and standardize column names in both datasets
# Load necessary libraries
library(dplyr)
library(janitor)


Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test

library(stringr)

# Clean and standardize Texas_Pop
Texas_Pop <- Texas_Pop %>%
  clean_names() %>%  # Standardize column names
  rename(
    max_sentence_date = maximum_sentence_date  # Example: Rename only if needed
  ) %>%
  mutate(
    offense_category = case_when(
      # Violent offenses
      str_detect(tdcj_offense, regex("assault|murder|manslaughter|robbery|injury|aggravated|stalking|kidnap|terroristic|riot", ignore_case = TRUE)) ~ "Violent",
      
      # Drug offenses
      str_detect(tdcj_offense, regex("drug|possession|poss|cocaine|heroin|meth|marijuana|substance|narcotics|amphetamine|marihuana|methamphetamine|controlled substance", ignore_case = TRUE)) ~ "Drug",
      
      # Sex offenses
      str_detect(tdcj_offense, regex("sex|indecency|child porn|rape|solicit|minors|prostitution|incest|exposure|sexual", ignore_case = TRUE)) ~ "Sex Offenses",
      
      # Property offenses
      str_detect(tdcj_offense, regex("theft|burglary|fraud|arson|tampering|mischief|forgery|embezzlement|credit card|debit card|material|misapplication|cattle|livestock|cargo|vehicle|motor", ignore_case = TRUE)) ~ "Property",
      
      # Public Order offenses
      str_detect(tdcj_offense, regex("dwi|intox|escape|bond|weapon|firearm|smuggling|prohibited|order|unlawful|carry|license|registration|viol|civil|discharge|endangering|child abandonment|unauthorized", ignore_case = TRUE)) ~ "Public Order",
      
      # Other offenses
      TRUE ~ "Other"
    )
  )

# Clean and standardize Texas_Release
Texas_Release <- Texas_Release %>%
  clean_names()  # Standardize column names

#Filter for aging population (55+)
library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ purrr     1.0.2
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(lubridate)
library(dplyr)
library(DT)
library(ggplot2)
Texas.Pop.Geriatric <- Texas_Pop %>%
  filter(age >= 55)
Texas.Pop.Geriatric <- Texas.Pop.Geriatric %>%
  mutate(admission_date = as.Date(sentence_date, format = "%m/%d/%Y"),
         admission_year = year(admission_date),
         birth_year = year(Sys.Date()) - age,
         age_at_admission = admission_year - birth_year)
Texas.Pop.Geriatric <- Texas.Pop.Geriatric %>%
  mutate(current_year = as.numeric(format(Sys.Date(), "%Y")),
         years_served = current_year - admission_year
  )
Texas.Pop.Geriatric.Chart <- Texas.Pop.Geriatric %>%
  group_by(age_at_admission, years_served) %>%
  summarize(total_count = n(), .groups = "drop")
datatable(Texas.Pop.Geriatric.Chart)

ggplot(Texas.Pop.Geriatric, aes(x = age_at_admission, y = years_served, color = age)) +
   geom_point(alpha = 0.2, size = 1) + 
  scale_color_gradient(low = "black", high = "blue") +
  labs(
    title = "Years Served vs. Age at Admission",
    subtitle = "Aging Population in Texas Prisons (55+)",
    x = "Age at Admission",
    y = "Years Served",
    color = "Age"
  )

Warning: Removed 217 rows containing missing values or values outside the scale range
(`geom_point()`).

#Number of emerging adults with 20 year sentence
library(tidyverse)
library(dplyr)
Texas_Pop <- Texas_Pop %>%
  mutate(admission_date = as.Date(sentence_date, format = "%m/%d/%Y"),
         admission_year = year(admission_date),
         current_year = as.numeric(format(Sys.Date(), "%Y")),
         years_served = current_year - admission_year
  )
LPT <- Texas_Pop %>%
  filter(years_served >=20) %>%
  mutate(birth_year = year(Sys.Date()) - age,
    age_at_admission = admission_year - birth_year
    )
EA.LPT <- LPT %>%
  filter(age_at_admission < 25
         )
EA.LPT <- EA.LPT %>%
  mutate(years_served_group = cut(years_served, breaks = seq(20, max(years_served, na.rm = TRUE) + 5, by = 5), include.lowest = TRUE))
EA.LPT.Breakdown <- EA.LPT %>%
  group_by(years_served_group) %>%
  summarise(total_count = n())
datatable(EA.LPT.Breakdown, 
          caption = "Number of Emerging Adults (Under 25) Serving 20+ Years by Years Served Cohorts")

#Average time served by offense, age group, and race (All LPTS)
library(tidyverse)
library(lubridate)
library(dplyr)
library(DT)
library(ggplot2)

LPT <- LPT %>%
  mutate(age_group = ifelse(age <=55, "55 and over", "Under 55"),
         race_group = case_when(
           race == "B" ~"Black",
           race == "H" ~ "Hispanic",
           race == "W" ~ "White",
           race %in% c("A", "I", "O") ~ "Other",
      TRUE ~ "Other"
         )
  )

LPT_LOS <- LPT %>%
  group_by(age_group, race_group) %>%
  summarise(
    average_LOS = mean(years_served, na.rm = TRUE), 
    count = n()
  ) %>%
  ungroup()

`summarise()` has grouped output by 'age_group'. You can override using the
`.groups` argument.

datatable(LPT_LOS, caption = "LPT by Age, Race and Average Years Served ")

ggplot(LPT_LOS, aes(x = average_LOS, y = interaction(age_group, race_group), fill = average_LOS)) +
  geom_tile() +
  labs(
    title = "Heatmap of Average Time Served (Served 20+)",
    x = "Aveerage LOS",
    y = "Age Group and Race",
    fill = "Average LOS (Years)"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold")
  )

OffenseLOS <- LPT %>%
  group_by(race_group, offense_category) %>%
  summarise(
    average_LOS = mean(years_served, na.rm = TRUE), 
    count = n()
  ) %>%
  ungroup()

`summarise()` has grouped output by 'race_group'. You can override using the
`.groups` argument.

datatable(OffenseLOS, caption = "LPT by Age, Race, LOS by offense cateogory")

ggplot(OffenseLOS, aes(x = offense_category, y = average_LOS, fill = race_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Average Time Served by Offense and Race Group (Served +20)",
    x = "Offense Category",
    y = "Average LOS",
    fill = "Race Group"
  ) +
  theme_minimal()

#Average time served by offense, age group, and race (Geriatric (55+) LPTs)
LPT.over55 <- LPT %>%
  mutate(age_group = ifelse(age <=55, "55 and over", "Under 55"),
         race_group = case_when(
           race == "B" ~"Black",
           race == "H" ~ "Hispanic",
           race == "W" ~ "White",
           race %in% c("A", "I", "O") ~ "Other",
      TRUE ~ "Other"
         )
  )
LPT.over55 <- LPT.over55 %>%
  filter(age_group == "55 and over")



LPT_LOS.55 <- LPT.over55 %>%
  group_by(age_group, race_group) %>%
  summarise(
    average_LOS = mean(years_served, na.rm = TRUE), 
    count = n()
  ) %>%
  ungroup()

`summarise()` has grouped output by 'age_group'. You can override using the
`.groups` argument.

datatable(LPT_LOS, caption = "LPT by Age, Race and Average Years Served (55 Years old)")

ggplot(LPT_LOS.55, aes(x = average_LOS, y = interaction(age_group, race_group), fill = average_LOS)) +
  geom_tile() +
  labs(
    title = "Heatmap of Average Time Served (55 Years Old, Served 20+)",
    x = "Aveerage LOS",
    y = "Age Group and Race",
    fill = "Average LOS (Years)"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold")
  )

OffenseLOS.55 <- LPT.over55 %>%
  group_by(race_group, offense_category) %>%
  summarise(
    average_LOS = mean(years_served, na.rm = TRUE), 
    count = n()
  ) %>%
  ungroup()

`summarise()` has grouped output by 'race_group'. You can override using the
`.groups` argument.

datatable(OffenseLOS, caption = "LPT by Age, Race, LOS by offense cateogory (55 years Old)")

ggplot(OffenseLOS.55, aes(x = offense_category, y = average_LOS, fill = race_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Average Time Served by Offense and Race Group (55 Years Old, Served +20)",
    x = "Offense Category",
    y = "Average LOS",
    fill = "Race Group"
  ) +
  theme_minimal()

# Calculate proportions by age group, and race
long_term_proportions_age <- LPT %>%
  group_by(age_group) %>%
  summarise(count = n()) %>%
  mutate(proportion = count / sum(count))

long_term_proportions_race <- LPT %>%
  group_by(race_group) %>%
  summarise(count = n()) %>%
  mutate(proportion = count / sum(count))

long_term_proportions_age_race <- LPT %>%
  group_by(age_group, race_group) %>%
  summarise(count = n()) %>%
  mutate(proportion = count / sum(count))

`summarise()` has grouped output by 'age_group'. You can override using the
`.groups` argument.

datatable(long_term_proportions_age_race, 
          caption = "Proportion of LPTs Among Aging Population by Race/Ethnicity")

#5-Year Admission Cohort Analysis of LPTs
library(dplyr)
library(DT)

# Ensure admission_year is numeric
LPT <- LPT %>%
  mutate(
    admission_year = as.numeric(admission_year),
    cohort = cut(
      admission_year,
      breaks = seq(1960, 2025, by = 5),
      include.lowest = TRUE
    ),
    admission_age_group = case_when(
      age_at_admission <= 25 ~ "Emerging Adult",
      age_at_admission > 25 ~ "Over 25",
      TRUE ~ NA_character_
    )
  )

# Cohort Analysis for All
cohort_analysis <- LPT %>%
  group_by(cohort, race_group, admission_age_group) %>%
  summarise(
    avg_years_served = mean(years_served, na.rm = TRUE),
    count = n()
  ) %>%
  ungroup()

`summarise()` has grouped output by 'cohort', 'race_group'. You can override
using the `.groups` argument.

# Display DataTable
datatable(cohort_analysis,
          caption = "Average Years Served for People Incarcerated Before/After 25")

# Filter for Over 55 and Repeat Analysis
LPT.over55 <- LPT %>%
  filter(age >= 55) %>%
  group_by(cohort, race_group, admission_age_group) %>%
  summarise(
    avg_years_served = mean(years_served, na.rm = TRUE),
    count = n()
  ) %>%
  ungroup()

`summarise()` has grouped output by 'cohort', 'race_group'. You can override
using the `.groups` argument.

# Display DataTable for Over 55
datatable(LPT.over55,
          caption = "Average Years Served for People Incarcerated Before/After 25, Now Over 55")

LPTViolent <- LPT %>%
  filter(offense_category == "Violent")

LPTViolent <- LPTViolent %>%
  mutate(age_cohort = case_when(
    age_at_admission <25 ~ "Emerging Adult",
    age_at_admission >= 25 & age_at_admission < 35 ~ "25-34 years",
    age_at_admission >= 35 & age_at_admission < 45 ~ "35-44 years",
    age_at_admission >= 45 & age_at_admission < 55 ~ "45-54 years",
    age_at_admission >= 55 ~ "55+ years"
  ))

Violent_LPT_Age_Race <- LPTViolent %>%
  group_by(age_cohort, race_group) %>%
  summarise(count = n()) %>%
  mutate(proportion = count / sum(count))

`summarise()` has grouped output by 'age_cohort'. You can override using the
`.groups` argument.

datatable(Violent_LPT_Age_Race, 
          caption = "Proportion of Violent Offense LPTs by Age Cohort and Race/Ethnicity")