# Analyze incarceration data (This is from the agent log, there is a separate incarceartion log)
rm(list=ls())
# Load R environment ---------
renv::activate()
# Load packages ---------
library(here)
## here() starts at /users/akhann16/code/cadre/data-analysis-plotting/Simulated-Data-Analysis/r/agent-log-analysis
library(data.table)
library(yaml)
library(ggplot2)
# Read RDS file ------------
#agent_log_env <- readRDS(here("agent-analysis", "rds-outs", "agent_log_env.RDS"))
agent_log_env <- readRDS("/users/akhann16/code/cadre/data-analysis-plotting/Simulated-Data-Analysis/r/agent-log-analysis/rds-outs/agent_log_env.RDS")
# Load data ------------
agent_dt <- agent_log_env[["agent_dt"]]
input_params <- agent_log_env[["input_params"]]
# Summary ------------
last_tick <- max(agent_dt$tick)
selected_ticks <- c(seq(1, last_tick, by = 10), last_tick)
# mean incarceration rate over time
incarceration_summary <- agent_dt[tick %in% selected_ticks,
.(n_incarcerated = sum(current_incarceration_status == 1),
n_agents = .N),
by = tick]
incarceration_summary[, rate_per_100k := n_incarcerated / n_agents * 100000]
ggplot(incarceration_summary, aes(x = tick, y = rate_per_100k)) +
geom_line() +
theme_minimal() +
labs(title = "",
x = "Time (Days)",
y = "Incarceration Rate (per 100,000 persons)")+
#theme(text = element_text(size = 20, face = "bold"))+
theme(text = element_text(face = "bold"))+
ylim(c(0,500))

# number incarcerated
ggplot(incarceration_summary, aes(x = tick, y = n_incarcerated)) +
geom_line() +
theme_minimal() +
labs(title = "",
x = "Time (Days)",
y = "Number Incarcerated")

# Last tick analysis ----------
# number and proportion of incarcerated persons at last tick:
incarceration_summary <- agent_dt[tick %in% selected_ticks,
.(n_incarcerated = sum(current_incarceration_status == 1),
n_agents = .N),
by = tick]
tail(incarceration_summary)
## tick n_incarcerated n_agents
## 1: 10901 18 10001
## 2: 10911 20 10000
## 3: 10921 20 10001
## 4: 10931 18 10003
## 5: 10941 15 10002
## 6: 10950 17 10001
# race distribution
agent_dt[tick==last_tick & current_incarceration_status == 1,
.N,
by=c("race")][,"prop":=round(N/sum(N)*100, 0)][]
## race N prop
## 1: Black 12 71
## 2: White 4 24
## 3: Hispanic 1 6
# sex distribution
agent_dt[tick==last_tick & current_incarceration_status == 1,
.N,
by=c("female")][,"prop":=round(N/sum(N)*100, 0)][]
## female N prop
## 1: 0 17 100
# race-sex distribution
agent_dt[tick==last_tick & current_incarceration_status == 1,
.N,
by=c("race", "female")][,"prop":=round(N/sum(N)*100, 0)][]
## race female N prop
## 1: Black 0 12 71
## 2: White 0 4 24
## 3: Hispanic 0 1 6
# Ever incarcerated ----------
# how many?
# To answer this question, we can group the last incarceration times by agent ID.
# If last incarceration time is -1, the person has never been incarcerated.
# If last incarceration time is > -1, the person has been incarcerated at least once.
last_incarceration_time <- agent_dt[,
.(last_incarceration_time = max(last_incarceration_tick)),
by = id]
n_ever_incarcerated <- sum(last_incarceration_time$last_incarceration_time != -1)
prop_ever_incarcerated <- n_ever_incarcerated / nrow(last_incarceration_time)
# agent-level analysis of those ever incarcerated
ever_incarcerated_times <- last_incarceration_time[last_incarceration_time > 1]
dim(ever_incarcerated_times)
## [1] 573 2
## demographics
### generate dataset
ever_incarcerated_ids <-
unique(agent_dt[id %in% ever_incarcerated_times$id, id])
ever_incarcerated_info <- agent_dt[id %in% ever_incarcerated_ids,
.SD[.N, list(
id,
age = last(age),
race = last(race),
female = last(female)
)],
by = id]
### create distributions
ever_incarcerated_info[,#age
.(
mean_age = mean(age),
min_age = min(age),
max_age = max(age)
)]
## mean_age min_age max_age
## 1: 70.26003 28 84
sex_incarcerated_proportion <-
ever_incarcerated_info[, .(
Count = .N,
Proportion = round(.N / nrow(ever_incarcerated_info), 2)
),
by = .(Sex = ifelse(female==1, "Female", "Male"))
]
race_incarcerated_proportion <-
ever_incarcerated_info[, .(
Count = .N,
Proportion = round(.N / nrow(ever_incarcerated_info), 2)
),
by = .(Race=race)]
race_sex_incarcerated_proportion <-
ever_incarcerated_info[, .N, by = c("race", "female")][
#race, sex
,
.(Race = race,
Sex = ifelse(female == 1, "Female", "Male"),
Count = N,
Proportion = round(N / sum(N), 2)
)[order(c(female, race))]
]
# Race/Sex distribution in full population ----------
## defined as % group representation in incarcerated population
## relative to to representation in general population
unique_agents <- unique(agent_dt, by = "id")
dim(unique_agents)
## [1] 15735 16
str(unique_agents)
## Classes 'data.table' and 'data.frame': 15735 obs. of 16 variables:
## $ tick : int 1 1 1 1 1 1 1 1 1 1 ...
## $ id : int 2610 7868 0 1 2 3 4 5 6 7 ...
## $ age : int 84 84 83 83 41 31 23 48 77 21 ...
## $ race : chr "Asian" "White" "Hispanic" "Hispanic" ...
## $ female : int 1 1 1 0 1 0 1 0 1 0 ...
## $ alc_use_status : int 0 3 1 3 0 0 0 1 0 0 ...
## $ smoking_status : chr "Never" "Former" "Former" "Former" ...
## $ last_incarceration_tick : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ last_release_tick : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ current_incarceration_status: int 0 0 0 0 0 0 0 0 0 0 ...
## $ entry_at_tick : int 0 0 0 0 0 0 0 0 0 0 ...
## $ exit_at_tick : int 1 1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ n_incarcerations : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_releases : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_smkg_stat_trans : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_alc_use_stat_trans : int 0 0 0 0 0 0 0 0 0 0 ...
## - attr(*, ".internal.selfref")=<externalptr>
race_population_proportion <-
unique_agents[, .(
Count = .N,
Proportion = round(.N/nrow(unique_agents), 2)
),
by = .(Race = race)
]
sex_population_proportion <-
unique_agents[, .(
Count = .N,
Proportion = round(.N/nrow(unique_agents), 2)
),
by = .(Sex = ifelse(female==1, "Female", "Male"))
]
race_sex_population_proportion <-
unique_agents[, .N, by = c("race", "female")][
#race, sex
,
.(Race = race,
Sex = ifelse(female == 1, "Female", "Male"),
Count = N,
Proportion = round(N / sum(N), 2)
)[order(c(female, race))]
]
# Disparity analysis ---------
## race-sex combined
disparity_analysis <- merge(
race_sex_incarcerated_proportion,
race_sex_population_proportion,
by = c("Race", "Sex"),
suffixes = c("_Incarcerated", "_Population")
)
disparity_analysis[, Disparity_Ratio := round(Proportion_Incarcerated / Proportion_Population, 2)]
disparity_analysis <- disparity_analysis[order(-Disparity_Ratio)]
disparity_analysis
## Race Sex Count_Incarcerated Proportion_Incarcerated
## 1: Black Male 152 0.27
## 2: Hispanic Male 115 0.20
## 3: White Male 257 0.45
## 4: Black Female 17 0.03
## 5: Asian Male 5 0.01
## 6: Hispanic Female 6 0.01
## 7: White Female 20 0.03
## 8: Asian Female 1 0.00
## Proportion_Population Count_Population Disparity_Ratio
## 1: 0.04 660 6.75
## 2: 0.08 1225 2.50
## 3: 0.35 5466 1.29
## 4: 0.04 675 0.75
## 5: 0.02 305 0.50
## 6: 0.08 1290 0.12
## 7: 0.37 5795 0.08
## 8: 0.02 319 0.00
## race only
disparity_analysis_race <- merge(
race_incarcerated_proportion,
race_population_proportion,
by = c("Race"),
suffixes = c("_Incarcerated", "_Population")
)
disparity_analysis_race[, Disparity_Ratio := round(Proportion_Incarcerated / Proportion_Population, 2)]
disparity_analysis_race[order(-Disparity_Ratio)]
## Race Count_Incarcerated Proportion_Incarcerated Count_Population
## 1: Black 169 0.29 1335
## 2: Hispanic 121 0.21 2515
## 3: White 277 0.48 11261
## 4: Asian 6 0.01 624
## Proportion_Population Disparity_Ratio
## 1: 0.08 3.62
## 2: 0.16 1.31
## 3: 0.72 0.67
## 4: 0.04 0.25
## sex only
disparity_analysis_sex <- merge(
sex_incarcerated_proportion,
sex_population_proportion,
by = c("Sex"),
suffixes = c("_Incarcerated", "_Population")
)
disparity_analysis_sex[, Disparity_Ratio := round(Proportion_Population / Proportion_Incarcerated, 2)]
disparity_analysis_sex[order(-Disparity_Ratio)]
## Sex Count_Incarcerated Proportion_Incarcerated Count_Population
## 1: Female 44 0.08 8079
## 2: Male 529 0.92 7656
## Proportion_Population Disparity_Ratio
## 1: 0.51 6.38
## 2: 0.49 0.53
# Visualizing incarceration disparity by sex and race -----------
# sex
sex_data <- data.frame(
Sex = rep(c("Female", "Male"), each = 2),
Count = c(43, 8080, 527, 7622),
Proportion = c(0.08, 0.51, 0.92, 0.49),
PopulationType = rep(c("Incarcerated", "General"), 2)
)
ggplot(sex_data, aes(x = Sex, y = Proportion, fill = PopulationType)) +
geom_bar(stat = "identity", position = position_dodge()) +
scale_fill_manual(values = c("General" = "blue", "Incarcerated" = "red")) +
scale_y_continuous(breaks = seq(0, 1, 0.25), limits = c(0, 1))+
labs(title = "",
x = "Sex",
y = "Proportion",
fill = "Population Type") +
theme_minimal()+
theme(text=element_text(face = "bold", size = 20))

# race
race_data <- data.frame(
Race = rep(c("Black", "Hispanic", "White", "Asian"), each = 2),
Count = c(188, 1310, 123, 2580, 254, 11214, 5, 598),
Proportion = c(0.33, 0.08, 0.22, 0.16, 0.45, 0.71, 0.01, 0.04),
PopulationType = rep(c("Incarcerated", "General"), 4)
)
ggplot(race_data, aes(x = Race, y = Proportion, fill = PopulationType)) +
geom_bar(stat = "identity", position = position_dodge()) +
scale_fill_manual(values = c("General" = "blue", "Incarcerated" = "red")) +
scale_y_continuous(breaks = seq(0, 1, 0.25), limits = c(0, 1))+
labs(title = "",
x = "Race",
y = "Proportion",
fill = "Population Type") +
theme_minimal()+
theme(text=element_text(face="bold", size=20))

# current smoking
race_data <- data.frame(
Race = rep(c("Black", "Hispanic", "White", "Asian"), each = 2),
Count = c(188, 1310, 123, 2580, 254, 11214, 5, 598),
Proportion = c(0.33, 0.08, 0.22, 0.16, 0.45, 0.71, 0.01, 0.04),
PopulationType = rep(c("Incarcerated", "General"), 4)
)
# Visualize incarceration disparity by smoking -----------
incarcerated <-
agent_dt[tick %in% selected_ticks & current_incarceration_status==1,
.N,
by=c("smoking_status")][,"prop":=round(N/sum(N), 3)][order(smoking_status)]
incarcerated$group <- "Incarcerated"
general_pop <-
agent_dt[tick %in% selected_ticks,
.N,
by=c("smoking_status")][,"prop":=round(N/sum(N), 3)][order(smoking_status)]
general_pop$group <- "General"
combined_data <- rbind(incarcerated, general_pop)
combined_data$smoking_status <- factor(combined_data$smoking_status,
levels = c("Current", "Former", "Never"))
ggplot(combined_data, aes(x = smoking_status, y = prop, fill = group)) +
geom_bar(stat = "identity", position = position_dodge()) +
scale_fill_manual(values = c("General" = "blue", "Incarcerated" = "red")) +
labs(title = "",
x = "Smoking Status",
y = "Proportion (%)",
fill = "Population Type")+
ylim(c(0,1))+
theme_minimal() +
#theme(text = element_text(size = 20, face = "bold"))
theme(text = element_text(face = "bold"))

# Visualize incarceration disparity by alcohol status -----------
incarcerated2 <-
agent_dt[tick %in% selected_ticks & current_incarceration_status == 1,
.N,
by=c("alc_use_status")][,"prop":=round(N/sum(N), 3)][order(alc_use_status)]
incarcerated2$group <- "Incarcerated"
general_pop2 <-
agent_dt[tick %in% selected_ticks,
.N,
by=c("alc_use_status")][,"prop":=round(N/sum(N), 3)][order(alc_use_status)]
general_pop2$group <- "General"
combined_data2 <- rbind(incarcerated2, general_pop2)
ggplot(combined_data2, aes(x = alc_use_status, y = prop, fill = group)) +
geom_bar(stat = "identity", position = position_dodge()) +
scale_fill_manual(values = c("General" = "blue", "Incarcerated" = "red")) +
labs(title = "",
x = "Alcohol Use",
y = "Proportion (%)",
fill = "Population Type")+
ylim(c(0,1)) +
theme_minimal() +
#theme(text = element_text(size = 20, face = "bold"))
theme()
