# Analyze incarceration data (This is from the agent log, there is a separate incarceartion log)
rm(list=ls())
# Load R environment ---------
renv::activate()
# Load packages ---------
library(here)
## here() starts at /users/akhann16/code/cadre/data-analysis-plotting/Simulated-Data-Analysis/r/agent-log-analysis
library(data.table)
library(yaml)
library(ggplot2)
# Read RDS file ------------
#agent_log_env <- readRDS(here("agent-analysis", "rds-outs", "agent_log_env.RDS"))
agent_log_env <- readRDS("/users/akhann16/code/cadre/data-analysis-plotting/Simulated-Data-Analysis/r/agent-log-analysis/rds-outs/agent_log_env.RDS")
# Load data ------------
agent_dt <- agent_log_env[["agent_dt"]]
input_params <- agent_log_env[["input_params"]]
# Summary ------------
last_tick <- max(agent_dt$tick)
selected_ticks <- c(seq(1, last_tick, by = 10), last_tick)
# mean incarceration rate over time
incarceration_summary <- agent_dt[tick %in% selected_ticks,
.(n_incarcerated = sum(current_incarceration_status == 1),
n_agents = .N),
by = tick]
incarceration_summary[, rate_per_100k := n_incarcerated / n_agents * 100000]
ggplot(incarceration_summary, aes(x = tick, y = rate_per_100k)) +
geom_line() +
theme_minimal() +
labs(title = "",
x = "Time (Days)",
y = "Incarceration Rate (per 100,000 persons)")+
theme(text = element_text(size = 20, face = "bold"))+
ylim(c(0,500))

# number incarcerated
ggplot(incarceration_summary, aes(x = tick, y = n_incarcerated)) +
geom_line() +
theme_minimal() +
labs(title = "",
x = "Time (Days)",
y = "Number Incarcerated")

# Last tick analysis ----------
# number and proportion of incarcerated persons at last tick:
incarceration_summary <- agent_dt[tick %in% selected_ticks,
.(n_incarcerated = sum(current_incarceration_status == 1),
n_agents = .N),
by = tick]
tail(incarceration_summary)
## tick n_incarcerated n_agents
## 1: 10901 15 10000
## 2: 10911 15 10002
## 3: 10921 16 10001
## 4: 10931 13 10001
## 5: 10941 12 10000
## 6: 10950 13 10000
# race distribution
agent_dt[tick==last_tick & current_incarceration_status == 1,
.N,
by=c("race")][,"prop":=round(N/sum(N)*100, 0)][]
## race N prop
## 1: Black 5 38
## 2: Hispanic 3 23
## 3: White 5 38
# sex distribution
agent_dt[tick==last_tick & current_incarceration_status == 1,
.N,
by=c("female")][,"prop":=round(N/sum(N)*100, 0)][]
## female N prop
## 1: 0 13 100
# race-sex distribution
agent_dt[tick==last_tick & current_incarceration_status == 1,
.N,
by=c("race", "female")][,"prop":=round(N/sum(N)*100, 0)][]
## race female N prop
## 1: Black 0 5 38
## 2: Hispanic 0 3 23
## 3: White 0 5 38
# Ever incarcerated ----------
# how many?
# To answer this question, we can group the last incarceration times by agent ID.
# If last incarceration time is -1, the person has never been incarcerated.
# If last incarceration time is > -1, the person has been incarcerated at least once.
last_incarceration_time <- agent_dt[,
.(last_incarceration_time = max(last_incarceration_tick)),
by = id]
n_ever_incarcerated <- sum(last_incarceration_time$last_incarceration_time != -1)
prop_ever_incarcerated <- n_ever_incarcerated / nrow(last_incarceration_time)
# agent-level analysis of those ever incarcerated
ever_incarcerated_times <- last_incarceration_time[last_incarceration_time > 1]
dim(ever_incarcerated_times)
## [1] 538 2
## demographics
### generate dataset
ever_incarcerated_ids <-
unique(agent_dt[id %in% ever_incarcerated_times$id, id])
ever_incarcerated_info <- agent_dt[id %in% ever_incarcerated_ids,
.SD[.N, list(
id,
age = last(age),
race = last(race),
female = last(female)
)],
by = id]
### create distributions
ever_incarcerated_info[,#age
.(
mean_age = mean(age),
min_age = min(age),
max_age = max(age)
)]
## mean_age min_age max_age
## 1: 69.3197 24 84
sex_incarcerated_proportion <-
ever_incarcerated_info[, .(
Count = .N,
Proportion = round(.N / nrow(ever_incarcerated_info), 2)
),
by = .(Sex = ifelse(female==1, "Female", "Male"))
]
race_incarcerated_proportion <-
ever_incarcerated_info[, .(
Count = .N,
Proportion = round(.N / nrow(ever_incarcerated_info), 2)
),
by = .(Race=race)]
race_sex_incarcerated_proportion <-
ever_incarcerated_info[, .N, by = c("race", "female")][
#race, sex
,
.(Race = race,
Sex = ifelse(female == 1, "Female", "Male"),
Count = N,
Proportion = round(N / sum(N), 2)
)[order(c(female, race))]
]
# Race/Sex distribution in full population ----------
## defined as % group representation in incarcerated population
## relative to to representation in general population
unique_agents <- unique(agent_dt, by = "id")
dim(unique_agents)
## [1] 15849 16
str(unique_agents)
## Classes 'data.table' and 'data.frame': 15849 obs. of 16 variables:
## $ tick : int 1 1 1 1 1 1 1 1 1 1 ...
## $ id : int 0 1 2 3 4 5 6 7 8 9 ...
## $ age : int 53 63 30 27 60 18 22 29 72 43 ...
## $ race : chr "White" "White" "White" "White" ...
## $ female : int 1 0 1 1 0 0 1 1 0 0 ...
## $ alc_use_status : int 0 0 0 1 1 1 1 0 0 1 ...
## $ smoking_status : chr "Former" "Never" "Never" "Never" ...
## $ last_incarceration_tick : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ last_release_tick : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ current_incarceration_status: int 0 0 0 0 0 0 0 0 0 0 ...
## $ entry_at_tick : int 0 0 0 0 0 0 0 0 0 0 ...
## $ exit_at_tick : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ n_incarcerations : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_releases : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_smkg_stat_trans : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_alc_use_stat_trans : int 0 0 0 0 0 0 0 0 0 0 ...
## - attr(*, ".internal.selfref")=<externalptr>
race_population_proportion <-
unique_agents[, .(
Count = .N,
Proportion = round(.N/nrow(unique_agents), 2)
),
by = .(Race = race)
]
sex_population_proportion <-
unique_agents[, .(
Count = .N,
Proportion = round(.N/nrow(unique_agents), 2)
),
by = .(Sex = ifelse(female==1, "Female", "Male"))
]
race_sex_population_proportion <-
unique_agents[, .N, by = c("race", "female")][
#race, sex
,
.(Race = race,
Sex = ifelse(female == 1, "Female", "Male"),
Count = N,
Proportion = round(N / sum(N), 2)
)[order(c(female, race))]
]
# Disparity analysis ---------
## race-sex combined
disparity_analysis <- merge(
race_sex_incarcerated_proportion,
race_sex_population_proportion,
by = c("Race", "Sex"),
suffixes = c("_Incarcerated", "_Population")
)
disparity_analysis[, Disparity_Ratio := round(Proportion_Incarcerated / Proportion_Population, 2)]
disparity_analysis <- disparity_analysis[order(-Disparity_Ratio)]
disparity_analysis
## Race Sex Count_Incarcerated Proportion_Incarcerated Count_Population
## 1: Black Male 158 0.29 696
## 2: Hispanic Male 100 0.19 1279
## 3: White Male 233 0.43 5484
## 4: Black Female 14 0.03 686
## 5: Asian Male 7 0.01 299
## 6: Hispanic Female 7 0.01 1298
## 7: White Female 19 0.04 5818
## Proportion_Population Disparity_Ratio
## 1: 0.04 7.25
## 2: 0.08 2.38
## 3: 0.35 1.23
## 4: 0.04 0.75
## 5: 0.02 0.50
## 6: 0.08 0.12
## 7: 0.37 0.11
## race only
disparity_analysis_race <- merge(
race_incarcerated_proportion,
race_population_proportion,
by = c("Race"),
suffixes = c("_Incarcerated", "_Population")
)
disparity_analysis_race[, Disparity_Ratio := round(Proportion_Incarcerated / Proportion_Population, 2)]
disparity_analysis_race[order(-Disparity_Ratio)]
## Race Count_Incarcerated Proportion_Incarcerated Count_Population
## 1: Black 172 0.32 1382
## 2: Hispanic 107 0.20 2577
## 3: White 252 0.47 11302
## 4: Asian 7 0.01 588
## Proportion_Population Disparity_Ratio
## 1: 0.09 3.56
## 2: 0.16 1.25
## 3: 0.71 0.66
## 4: 0.04 0.25
## sex only
disparity_analysis_sex <- merge(
sex_incarcerated_proportion,
sex_population_proportion,
by = c("Sex"),
suffixes = c("_Incarcerated", "_Population")
)
disparity_analysis_sex[, Disparity_Ratio := round(Proportion_Population / Proportion_Incarcerated, 2)]
disparity_analysis_sex[order(-Disparity_Ratio)]
## Sex Count_Incarcerated Proportion_Incarcerated Count_Population
## 1: Female 40 0.07 8091
## 2: Male 498 0.93 7758
## Proportion_Population Disparity_Ratio
## 1: 0.51 7.29
## 2: 0.49 0.53
# Visualizing incarceration disparity by sex and race -----------
# sex
sex_data <- data.frame(
Sex = rep(c("Female", "Male"), each = 2),
Count = c(43, 8080, 527, 7622),
Proportion = c(0.08, 0.51, 0.92, 0.49),
PopulationType = rep(c("Incarcerated", "General"), 2)
)
ggplot(sex_data, aes(x = Sex, y = Proportion, fill = PopulationType)) +
geom_bar(stat = "identity", position = position_dodge()) +
scale_fill_manual(values = c("General" = "blue", "Incarcerated" = "red")) +
scale_y_continuous(breaks = seq(0, 1, 0.25), limits = c(0, 1))+
labs(title = "",
x = "Sex",
y = "Proportion",
fill = "Population Type") +
theme_minimal()+
theme(text=element_text(face = "bold", size = 20))

# race
race_data <- data.frame(
Race = rep(c("Black", "Hispanic", "White", "Asian"), each = 2),
Count = c(188, 1310, 123, 2580, 254, 11214, 5, 598),
Proportion = c(0.33, 0.08, 0.22, 0.16, 0.45, 0.71, 0.01, 0.04),
PopulationType = rep(c("Incarcerated", "General"), 4)
)
ggplot(race_data, aes(x = Race, y = Proportion, fill = PopulationType)) +
geom_bar(stat = "identity", position = position_dodge()) +
scale_fill_manual(values = c("General" = "blue", "Incarcerated" = "red")) +
scale_y_continuous(breaks = seq(0, 1, 0.25), limits = c(0, 1))+
labs(title = "",
x = "Race",
y = "Proportion",
fill = "Population Type") +
theme_minimal()+
theme(text=element_text(face="bold", size=20))

# current smoking
race_data <- data.frame(
Race = rep(c("Black", "Hispanic", "White", "Asian"), each = 2),
Count = c(188, 1310, 123, 2580, 254, 11214, 5, 598),
Proportion = c(0.33, 0.08, 0.22, 0.16, 0.45, 0.71, 0.01, 0.04),
PopulationType = rep(c("Incarcerated", "General"), 4)
)
# Visualize incarceration disparity by smoking -----------
incarcerated <-
agent_dt[tick %in% selected_ticks & current_incarceration_status==1,
.N,
by=c("smoking_status")][,"prop":=round(N/sum(N), 3)][order(smoking_status)]
incarcerated$group <- "Incarcerated"
general_pop <-
agent_dt[tick %in% selected_ticks,
.N,
by=c("smoking_status")][,"prop":=round(N/sum(N), 3)][order(smoking_status)]
general_pop$group <- "General"
combined_data <- rbind(incarcerated, general_pop)
combined_data$smoking_status <- factor(combined_data$smoking_status,
levels = c("Current", "Former", "Never"))
ggplot(combined_data, aes(x = smoking_status, y = prop, fill = group)) +
geom_bar(stat = "identity", position = position_dodge()) +
scale_fill_manual(values = c("General" = "blue", "Incarcerated" = "red")) +
labs(title = "",
x = "Smoking Status",
y = "Proportion (%)",
fill = "Population Type")+
ylim(c(0,1))+
theme_minimal() +
theme(text = element_text(size = 20, face = "bold"))

# Visualize incarceration disparity by alcohol status -----------
incarcerated2 <-
agent_dt[tick %in% selected_ticks & current_incarceration_status == 1,
.N,
by=c("alc_use_status")][,"prop":=round(N/sum(N), 3)][order(alc_use_status)]
incarcerated2$group <- "Incarcerated"
general_pop2 <-
agent_dt[tick %in% selected_ticks,
.N,
by=c("alc_use_status")][,"prop":=round(N/sum(N), 3)][order(alc_use_status)]
general_pop2$group <- "General"
combined_data2 <- rbind(incarcerated2, general_pop2)
ggplot(combined_data2, aes(x = alc_use_status, y = prop, fill = group)) +
geom_bar(stat = "identity", position = position_dodge()) +
scale_fill_manual(values = c("General" = "blue", "Incarcerated" = "red")) +
labs(title = "",
x = "Alcohol Use",
y = "Proportion (%)",
fill = "Population Type")+
ylim(c(0,1)) +
theme_minimal() +
theme(text = element_text(size = 20, face = "bold"))
