Illinois

Quarto

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

library(readxl)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(lubridate)

Attaching package: 'lubridate'
The following objects are masked from 'package:base':

    date, intersect, setdiff, union
library(ggplot2)
library(DT)

#load Data
Illinois_Pop_2005 <-read_excel("~/Library/CloudStorage/Box-Box/Company/Working files/Sticker Shock - Aging in Prison/03 Data/Illinois/june-2005-stock-pop.xls")
Illinois_Pop_2006 <-read_excel("~/Library/CloudStorage/Box-Box/Company/Working files/Sticker Shock - Aging in Prison/03 Data/Illinois/december-2006-stock-pop.xls")
Illinois_Pop_2007 <-read_excel("~/Library/CloudStorage/Box-Box/Company/Working files/Sticker Shock - Aging in Prison/03 Data/Illinois/december-2007-stock-pop.xls")
Illinois_Pop_2008 <-read_excel("~/Library/CloudStorage/Box-Box/Company/Working files/Sticker Shock - Aging in Prison/03 Data/Illinois/december-2008-stock-pop.xls")
Illinois_Pop_2009 <-read_excel("~/Library/CloudStorage/Box-Box/Company/Working files/Sticker Shock - Aging in Prison/03 Data/Illinois/december-2009-stock-pop.xls")
Illinois_Pop_2010 <-read_excel("~/Library/CloudStorage/Box-Box/Company/Working files/Sticker Shock - Aging in Prison/03 Data/Illinois/dec-2010-stock-pop.xls")
Illinois_Pop_2011 <-read_excel("~/Library/CloudStorage/Box-Box/Company/Working files/Sticker Shock - Aging in Prison/03 Data/Illinois/dec-2011-prison-stock-pop.xls")
Illinois_Pop_2012 <-read_excel("~/Library/CloudStorage/Box-Box/Company/Working files/Sticker Shock - Aging in Prison/03 Data/Illinois/dec-2012-prison-stock-internet-variables.xls")
Illinois_Pop_2013 <-read_excel("~/Library/CloudStorage/Box-Box/Company/Working files/Sticker Shock - Aging in Prison/03 Data/Illinois/dec-2013-prison-stock-internet-variables.xls")
Illinois_Pop_2014 <-read_excel("~/Library/CloudStorage/Box-Box/Company/Working files/Sticker Shock - Aging in Prison/03 Data/Illinois/dec-2014-prison-stock-internet-variables.xls")
Illinois_Pop_2015 <-read_excel("~/Library/CloudStorage/Box-Box/Company/Working files/Sticker Shock - Aging in Prison/03 Data/Illinois/internet-data-set-prison-stock-dec-2015.xls")
Illinois_Pop_2016 <-read_excel("~/Library/CloudStorage/Box-Box/Company/Working files/Sticker Shock - Aging in Prison/03 Data/Illinois/internet-data-set-prison-stock-dec-2016.xls")
Illinois_Pop_2017 <-read_excel("~/Library/CloudStorage/Box-Box/Company/Working files/Sticker Shock - Aging in Prison/03 Data/Illinois/internet-data-set-prison-stock-dec-2017.xls")
Illinois_Pop_2018 <-read_excel("~/Library/CloudStorage/Box-Box/Company/Working files/Sticker Shock - Aging in Prison/03 Data/Illinois/december-2018-prison-stock.xls")
Illinois_Pop_2019 <-read_excel("~/Library/CloudStorage/Box-Box/Company/Working files/Sticker Shock - Aging in Prison/03 Data/Illinois/dec-31-2019-prison-stock-pop.xls")
Illinois_Pop_2020 <-read_excel("~/Library/CloudStorage/Box-Box/Company/Working files/Sticker Shock - Aging in Prison/03 Data/Illinois/dec-2020-prison-stock.xls")
Illinois_Pop_2021 <-read_excel("~/Library/CloudStorage/Box-Box/Company/Working files/Sticker Shock - Aging in Prison/03 Data/Illinois/december-2021-prison-stock.xls")
Illinois_Pop_2022 <-read_excel("~/Library/CloudStorage/Box-Box/Company/Working files/Sticker Shock - Aging in Prison/03 Data/Illinois/Dec-2022-Prison-Population-Data-Set.xls")
Illinois_Pop_2023 <-read_excel("~/Library/CloudStorage/Box-Box/Company/Working files/Sticker Shock - Aging in Prison/03 Data/Illinois/December-2023-Prison.xls")
Illinois_Pop_2024 <-read_excel("~/Library/CloudStorage/Box-Box/Company/Working files/Sticker Shock - Aging in Prison/03 Data/Illinois/December-2024-Prison (1).xls")

# Store datasets in a list
df_list <- list(Illinois_Pop_2005, Illinois_Pop_2006, Illinois_Pop_2007, Illinois_Pop_2008, Illinois_Pop_2009, Illinois_Pop_2010, Illinois_Pop_2011, Illinois_Pop_2012, Illinois_Pop_2013, Illinois_Pop_2014, Illinois_Pop_2015, Illinois_Pop_2016, Illinois_Pop_2017, Illinois_Pop_2018, Illinois_Pop_2019, Illinois_Pop_2020, Illinois_Pop_2021, Illinois_Pop_2022, Illinois_Pop_2023, Illinois_Pop_2024)

# Assign correct cohort years
years <- 2005:2024
names(df_list) <- years

for (i in seq_along(df_list)) {
  df_list[[i]]$Year <- years[i]
}
library(dplyr)
colnames(Illinois_Pop_2005) <- tolower(colnames(Illinois_Pop_2005))  # Convert to lowercase
colnames(Illinois_Pop_2005) <- gsub("\\s+", "_", colnames(Illinois_Pop_2005))
colnames(Illinois_Pop_2005) <- tolower(colnames(Illinois_Pop_2005))  # Convert to lowercase
colnames(Illinois_Pop_2005) <- gsub("\\s+", "_", colnames(Illinois_Pop_2005))
colnames(Illinois_Pop_2006) <- tolower(colnames(Illinois_Pop_2006))  
colnames(Illinois_Pop_2006) <- gsub("\\s+", "_", colnames(Illinois_Pop_2006))  

colnames(Illinois_Pop_2007) <- tolower(colnames(Illinois_Pop_2007))  
colnames(Illinois_Pop_2007) <- gsub("\\s+", "_", colnames(Illinois_Pop_2007))  

colnames(Illinois_Pop_2008) <- tolower(colnames(Illinois_Pop_2008))  
colnames(Illinois_Pop_2008) <- gsub("\\s+", "_", colnames(Illinois_Pop_2008))  

colnames(Illinois_Pop_2009) <- tolower(colnames(Illinois_Pop_2009))  
colnames(Illinois_Pop_2009) <- gsub("\\s+", "_", colnames(Illinois_Pop_2009))  

colnames(Illinois_Pop_2010) <- tolower(colnames(Illinois_Pop_2010))  
colnames(Illinois_Pop_2010) <- gsub("\\s+", "_", colnames(Illinois_Pop_2010))  

colnames(Illinois_Pop_2011) <- tolower(colnames(Illinois_Pop_2011))  
colnames(Illinois_Pop_2011) <- gsub("\\s+", "_", colnames(Illinois_Pop_2011))  

colnames(Illinois_Pop_2012) <- tolower(colnames(Illinois_Pop_2012))  
colnames(Illinois_Pop_2012) <- gsub("\\s+", "_", colnames(Illinois_Pop_2012))  

colnames(Illinois_Pop_2013) <- tolower(colnames(Illinois_Pop_2013))  
colnames(Illinois_Pop_2013) <- gsub("\\s+", "_", colnames(Illinois_Pop_2013))  

colnames(Illinois_Pop_2014) <- tolower(colnames(Illinois_Pop_2014))  
colnames(Illinois_Pop_2014) <- gsub("\\s+", "_", colnames(Illinois_Pop_2014))  

colnames(Illinois_Pop_2015) <- tolower(colnames(Illinois_Pop_2015))  
colnames(Illinois_Pop_2015) <- gsub("\\s+", "_", colnames(Illinois_Pop_2015))  

colnames(Illinois_Pop_2016) <- tolower(colnames(Illinois_Pop_2016))  
colnames(Illinois_Pop_2016) <- gsub("\\s+", "_", colnames(Illinois_Pop_2016))  

colnames(Illinois_Pop_2017) <- tolower(colnames(Illinois_Pop_2017))  
colnames(Illinois_Pop_2017) <- gsub("\\s+", "_", colnames(Illinois_Pop_2017))  

colnames(Illinois_Pop_2018) <- tolower(colnames(Illinois_Pop_2018))  
colnames(Illinois_Pop_2018) <- gsub("\\s+", "_", colnames(Illinois_Pop_2018))  

colnames(Illinois_Pop_2019) <- tolower(colnames(Illinois_Pop_2019))  
colnames(Illinois_Pop_2019) <- gsub("\\s+", "_", colnames(Illinois_Pop_2019))  

colnames(Illinois_Pop_2020) <- tolower(colnames(Illinois_Pop_2020))  
colnames(Illinois_Pop_2020) <- gsub("\\s+", "_", colnames(Illinois_Pop_2020))  

colnames(Illinois_Pop_2021) <- tolower(colnames(Illinois_Pop_2021))  
colnames(Illinois_Pop_2021) <- gsub("\\s+", "_", colnames(Illinois_Pop_2021))  

colnames(Illinois_Pop_2022) <- tolower(colnames(Illinois_Pop_2022))  
colnames(Illinois_Pop_2022) <- gsub("\\s+", "_", colnames(Illinois_Pop_2022))  

colnames(Illinois_Pop_2023) <- tolower(colnames(Illinois_Pop_2023))  
colnames(Illinois_Pop_2023) <- gsub("\\s+", "_", colnames(Illinois_Pop_2023))  

colnames(Illinois_Pop_2024) <- tolower(colnames(Illinois_Pop_2024))  
colnames(Illinois_Pop_2024) <- gsub("\\s+", "_", colnames(Illinois_Pop_2024))
library(lubridate)
library(dplyr)

# List of affected dataframes and their names
date_fix_dfs <- list(
  "2012" = Illinois_Pop_2012,
  "2013" = Illinois_Pop_2013,
  "2014" = Illinois_Pop_2014,
  "2015" = Illinois_Pop_2015,
  "2016" = Illinois_Pop_2016,
  "2017" = Illinois_Pop_2017
)

# Columns to convert
date_columns <- c(
  "date_of_birth",
  "current_admission_date",
  "projected_mandatory_supervised_released_(msr)_date2",
  "projected_discharge_date2",
  "custody_date",
  "sentence_date"
)

# Function to reformat MMDDYYYY-style dates to Date objects
fix_dates <- function(df) {
  for (col in date_columns) {
    if (col %in% colnames(df)) {
      df[[col]] <- suppressWarnings(mdy(as.character(df[[col]])))
    }
  }
  return(df)
}

# Apply fix to all affected years
Illinois_Pop_2012 <- fix_dates(Illinois_Pop_2012)
Illinois_Pop_2013 <- fix_dates(Illinois_Pop_2013)
Illinois_Pop_2014 <- fix_dates(Illinois_Pop_2014)
Illinois_Pop_2015 <- fix_dates(Illinois_Pop_2015)
Illinois_Pop_2016 <- fix_dates(Illinois_Pop_2016)
Illinois_Pop_2017 <- fix_dates(Illinois_Pop_2017)
process_df <- function(df, snapshot_date) {
  # Convert consistently structured fields
  df <- df %>%
    mutate(
      sentence_date = suppressWarnings(ymd(sentence_date)),
      date_of_birth = suppressWarnings(ymd(date_of_birth)),
      sentence_years = as.numeric(gsub("[^0-9.]", "", sentence_years)),
      sentence_months = as.numeric(gsub("[^0-9.]", "", sentence_months))
    )

  # Fix all optional date columns by checking + converting safely
  safe_date <- function(x) suppressWarnings(ymd(as.character(x)))

  if ("current_admission_date" %in% colnames(df)) {
    df$current_admission_date <- safe_date(df$current_admission_date)
  }
  if ("projected_mandatory_supervised_release_(msr)_date2" %in% colnames(df)) {
    df$`projected_mandatory_supervised_release_(msr)_date2` <- safe_date(df$`projected_mandatory_supervised_release_(msr)_date2`)
  }
  if ("projected_discharge_date2" %in% colnames(df)) {
    df$projected_discharge_date2 <- safe_date(df$projected_discharge_date2)
  }
  if ("custody_date" %in% colnames(df)) {
    df$custody_date <- safe_date(df$custody_date)
  }

  # Now continue with calculated fields
  df <- df %>%
    mutate(
      age_at_snapshot = ifelse(!is.na(date_of_birth),
                               as.integer(interval(start = date_of_birth, end = snapshot_date) / years(1)),
                               NA),
      length_of_stay = ifelse(!is.na(sentence_date),
                              as.integer(interval(start = sentence_date, end = snapshot_date) / years(1)),
                              NA),
      age_bracket = case_when(
        age_at_snapshot >= 75 ~ "75+",
        age_at_snapshot >= 65 ~ "65-74",
        age_at_snapshot >= 55 ~ "55-64",
        TRUE ~ "Under 55"
      ),
      emerging_adult = ifelse(age_at_snapshot < 25 & length_of_stay >= 15, TRUE, FALSE)
    )

  return(df)
}
# List of all dataframes
data_list <- list(
  Illinois_Pop_2005, Illinois_Pop_2006, Illinois_Pop_2007, Illinois_Pop_2008, Illinois_Pop_2009,
  Illinois_Pop_2010, Illinois_Pop_2011, Illinois_Pop_2012, Illinois_Pop_2013, Illinois_Pop_2014,
  Illinois_Pop_2015, Illinois_Pop_2016, Illinois_Pop_2017, Illinois_Pop_2018, Illinois_Pop_2019,
  Illinois_Pop_2020, Illinois_Pop_2021, Illinois_Pop_2022, Illinois_Pop_2023, Illinois_Pop_2024
)

# Process each dataframe
# Create the snapshot date for each year
library(lubridate)

snapshot_dates <- seq(ymd("2005-06-30"), ymd("2024-06-30"), by = "years")
processed_data <- Map(process_df, data_list, snapshot_dates)
analysis_keep_cols <- c(
  "idoc_#", "race", "sex", "sentence_date", "date_of_birth",
  "sentence_years", "sentence_months", "year", "current_admission_date",
  "crime_class", "holding_offense", "truth_in_sentencing",
  "age_at_snapshot", "length_of_stay", "age_bracket", "emerging_adult"
)

# Drop extra columns and merge
analysis_data <- lapply(processed_data, function(df) {
  df[, intersect(analysis_keep_cols, names(df))]
}) %>%
  bind_rows(.id = "year") %>%
  mutate(year = as.integer(year) + 2004)
# Flag individuals who have served 15+ years at the time of snapshot
analysis_data <- analysis_data %>%
  mutate(long_term_served = ifelse(length_of_stay >= 15, TRUE, FALSE))
long_term_summary <- analysis_data %>%
  group_by(year) %>%
  summarise(
    total_incarcerated = n(),
    long_term_count = sum(long_term_served, na.rm = TRUE),
    long_term_percent = round(100 * long_term_count / total_incarcerated, 1)
  )
ggplot(long_term_summary, aes(x = year, y = long_term_percent)) +
  geom_line(size = 1.2, color = "firebrick") +
  geom_point(size = 2, color = "firebrick") +
  labs(
    title = "Growth of Individuals Who Have Served 15+ Years in Prison (Illinois, 2005–2024)",
    x = "Year",
    y = "Percent of Prison Population",
    caption = "Source: IDOC snapshot data, June 30th each year"
  ) +
  theme_minimal()
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

aging_trends <- analysis_data %>%
  filter(age_bracket %in% c("55-64", "65-74", "75+")) %>%
  group_by(year, age_bracket) %>%
  summarise(count = n(), .groups = "drop")

ggplot(aging_trends, aes(x = year, y = count, color = age_bracket)) +
  geom_line(size = 1.2) +
  labs(
    title = "Aging Prison Population in Illinois (2005–2024)",
    x = "Year",
    y = "Number of Incarcerated Individuals",
    color = "Age Bracket"
  ) +
  theme_minimal()

long_term_trend <- analysis_data %>%
  mutate(long_term = sentence_years >= 15) %>%
  group_by(year) %>%
  summarise(
    total = n(),
    long_term = sum(long_term, na.rm = TRUE),
    percent_long_term = round(100 * long_term / total, 1),
    .groups = "drop"
  )

ggplot(long_term_trend, aes(x = year, y = percent_long_term)) +
  geom_line(size = 1.2, color = "firebrick") +
  labs(
    title = "Share of People Serving Long Prison Terms (15+ years)",
    x = "Year",
    y = "Percent (%)"
  ) +
  theme_minimal()

race_long_term <- analysis_data %>%
  filter(sentence_years >= 15) %>%
  group_by(year, race) %>%
  summarise(count = n(), .groups = "drop")

ggplot(race_long_term, aes(x = year, y = count, color = race)) +
  geom_line(size = 1.2) +
  labs(
    title = "Racial Disparities in Long Prison Terms (15+ years)",
    x = "Year",
    y = "Number of People",
    color = "Race"
  ) +
  theme_minimal()

long_term_by_sex <- analysis_data %>%
  group_by(year, sex) %>%
  summarise(
    total = n(),
    long_term = sum(long_term_served, na.rm = TRUE),
    percent_long_term = round(100 * long_term / total, 1),
    .groups = "drop"
  )

ggplot(long_term_by_sex, aes(x = year, y = percent_long_term, color = sex)) +
  geom_line(size = 1.2) +
  labs(
    title = "Percent of Prison Population Who Have Served 15+ Years (By Sex)",
    x = "Year", y = "Percent (%)",
    color = "Sex"
  ) +
  theme_minimal()

emerging_adults_long_term <- analysis_data %>%
  filter(emerging_adult == TRUE) %>%
  group_by(year) %>%
  summarise(
    count = n(),
    .groups = "drop"
  )

ggplot(emerging_adults_long_term, aes(x = year, y = count)) +
  geom_line(color = "darkgreen", size = 1.2) +
  geom_point(size = 2, color = "darkgreen") +
  labs(
    title = "Emerging Adults Serving 15+ Years: Snapshot Growth Over Time",
    x = "Year",
    y = "Number of People",
    caption = "Emerging adults = under 25 at time of admission and served 15+ years"
  ) +
  theme_minimal()

long_term_by_race <- analysis_data %>%
  group_by(year, race) %>%
  summarise(
    total = n(),
    long_term = sum(long_term_served, na.rm = TRUE),
    percent_long_term = round(100 * long_term / total, 1),
    .groups = "drop"
  )

ggplot(long_term_by_race, aes(x = year, y = percent_long_term, color = race)) +
  geom_line(size = 1.2) +
  labs(
    title = "Percent of Prison Population Who Have Served 15+ Years (By Race)",
    x = "Year", y = "Percent (%)",
    color = "Race"
  ) +
  theme_minimal()

aging_data <- analysis_data %>%
  filter(age_bracket %in% c("55-64", "65-74", "75+"))

aging_by_year <- aging_data %>%
  group_by(year, age_bracket) %>%
  summarise(count = n(), .groups = "drop")

ggplot(aging_by_year, aes(x = year, y = count, color = age_bracket)) +
  geom_line(size = 1.2) +
  geom_point(size = 2) +
  labs(
    title = "Growth of Aging Prison Population in Illinois (Ages 55+)",
    x = "Year",
    y = "Number of Incarcerated People",
    color = "Age Bracket",
    caption = "Source: IDOC snapshot data"
  ) +
  theme_minimal()

aging_summary <- analysis_data %>%
  mutate(is_aging = age_at_snapshot >= 55) %>%
  group_by(year) %>%
  summarise(
    total = n(),
    aging_count = sum(is_aging, na.rm = TRUE),
    percent_aging = round(100 * aging_count / total, 1),
    .groups = "drop"
  )

ggplot(aging_summary, aes(x = year, y = percent_aging)) +
  geom_line(size = 1.2, color = "steelblue") +
  geom_point(size = 2, color = "steelblue") +
  labs(
    title = "Percent of Illinois Prison Population Aged 55 and Older",
    x = "Year", y = "Percent (%)",
    caption = "Includes all individuals age 55+ at snapshot"
  ) +
  theme_minimal()

aging_by_race <- aging_data %>%
  group_by(year, race) %>%
  summarise(count = n(), .groups = "drop")

ggplot(aging_by_race, aes(x = year, y = count, color = race)) +
  geom_line(size = 1.2) +
  labs(
    title = "Aging Prison Population (55+) by Race",
    x = "Year", y = "Number of People",
    color = "Race"
  ) +
  theme_minimal()

aging_by_sex <- aging_data %>%
  group_by(year, sex) %>%
  summarise(count = n(), .groups = "drop")

ggplot(aging_by_sex, aes(x = year, y = count, color = sex)) +
  geom_line(size = 1.2) +
  labs(
    title = "Aging Prison Population (55+) by Sex",
    x = "Year", y = "Number of People",
    color = "Sex"
  ) +
  theme_minimal()