# Analyze incarceration data
# Load R environment ---------
renv::activate()
# Load packages ---------
library(here)
## here() starts at /users/akhann16/code/cadre/data-analysis-plotting/Simulated-Data-Analysis/r/agent-analysis
library(data.table)
library(yaml)
library(ggplot2)
# Utility functions ------------
#source(here("agent-analysis", "utils", "post-release-smoking.R"))
source("/users/akhann16/code/cadre/data-analysis-plotting/Simulated-Data-Analysis/r/agent-analysis/utils/post-release-smoking.R")
# Read RDS file ------------
#agent_log_env <- readRDS(here("agent-analysis", "rds-outs", "agent_log_env.RDS"))
agent_log_env <- readRDS("/users/akhann16/code/cadre/data-analysis-plotting/Simulated-Data-Analysis/r/agent-analysis/rds-outs/agent_log_env.RDS")
# Load data ------------
agent_dt <- agent_log_env[["agent_dt"]]
input_params <- agent_log_env[["input_params"]]
# Tick variables ------------
last_tick <- max(agent_dt$tick)
selected_ticks <- c(seq(1, last_tick, by = 10), last_tick)
last_tick_data <- agent_dt[tick==last_tick,]
# Smoking ratio at last tick of current smoking to all persons ----------
smoking_ratio <-
last_tick_data[, .(
n_current_smokers = sum(smoking_status == "Current"),
n_total_persons = sum(smoking_status %in% c("Current", "Former", "Never"))
), by = .(group = n_releases >= 1)]
# Compute the ratio of current smokers to total smokers in each group
smoking_ratio
## group n_current_smokers n_total_persons
## 1: FALSE 1252 9604
## 2: TRUE 133 396
smoking_ratio[, ratio := n_current_smokers / n_total_persons][]
## group n_current_smokers n_total_persons ratio
## 1: FALSE 1252 9604 0.1303623
## 2: TRUE 133 396 0.3358586
# Compute change in current smoking over time since release ----------
## computation
time_periods <- c(1, 7, 14, 30, 90, 180, 365)
labels <- c("1D", "1W", "2W", "1M", "3M", "6M", "1Y")
summaries_smoking <- lapply(time_periods, function(x) summary_after_release(x))
summaries_smoking
## [[1]]
## [[1]]$proportions
## Category Proportion
## 1 Current 0.7310345
## 2 Former 0.1103448
## 3 Never 0.1586207
##
## [[1]]$agent_count
## [1] 145
##
##
## [[2]]
## [[2]]$proportions
## Category Proportion
## 1 Current 0.80916031
## 2 Former 0.06965649
## 3 Never 0.12118321
##
## [[2]]$agent_count
## [1] 1048
##
##
## [[3]]
## [[3]]$proportions
## Category Proportion
## 1 Current 0.80037665
## 2 Former 0.07532957
## 3 Never 0.12429379
##
## [[3]]$agent_count
## [1] 2124
##
##
## [[4]]
## [[4]]$proportions
## Category Proportion
## 1 Current 0.7882743
## 2 Former 0.0869469
## 3 Never 0.1247788
##
## [[4]]$agent_count
## [1] 4520
##
##
## [[5]]
## [[5]]$proportions
## Category Proportion
## 1 Current 0.7375224
## 2 Former 0.1306548
## 3 Never 0.1318228
##
## [[5]]$agent_count
## [1] 12843
##
##
## [[6]]
## [[6]]$proportions
## Category Proportion
## 1 Current 0.6717852
## 2 Former 0.1871198
## 3 Never 0.1410950
##
## [[6]]$agent_count
## [1] 23835
##
##
## [[7]]
## [[7]]$proportions
## Category Proportion
## 1 Current 0.5891743
## 2 Former 0.2515474
## 3 Never 0.1592783
##
## [[7]]$agent_count
## [1] 41845
# Visualization (smoking in released persons) ----------
current_smoker_proportions <- sapply(
summaries_smoking,
function(x) {
if ("Current" %in% x$proportions$Category){
return(x$proportions[x$proportions$Category == "Current", "Proportion"])
}
else {
return(0)
}
}
)
current_smoker_df <- data.frame(Time = time_periods,
Labels = labels,
Proportion = current_smoker_proportions)
p <-
ggplot(current_smoker_df, aes(x = Time, y = Proportion, group = 1)) +
geom_line() +
geom_point() +
scale_x_continuous(breaks = time_periods, labels = labels) +
scale_y_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
labs(title = "",
x = "Time After Release",
y = "Proportion of Current Smoking") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
# Visualization (smoking in general population) ----------
# calculate the proportion of current, former, and never smokers at the specified time ticks
smokers_by_tick <- agent_dt[tick %in% selected_ticks, .(
current_smokers = sum(smoking_status == "Current") / .N,
former_smokers = sum(smoking_status == "Former") / .N,
never_smokers = sum(smoking_status == "Never") / .N
), by = tick]
# mean from the the last last 10 rows
mean_current_smoker <- mean(tail(smokers_by_tick$current_smokers, 10))
# create a new data frame for the general population
general_population <- data.frame(
Time = current_smoker_df$Time,
Proportion = rep(mean_current_smoker, length(current_smoker_df$Time))
)
# create the plot
p <- ggplot() +
geom_line(data = current_smoker_df, aes(x = Time, y = Proportion, color = "Previously Incarcerated"), linewidth=1.5) +
geom_line(data = general_population, aes(x = Time, y = Proportion, color = "Overall"), linewidth=1.5) +
scale_color_manual(values = c("Previously Incarcerated" = "blue", "Overall" = "red")) +
scale_x_continuous(breaks = time_periods, labels = labels) +
scale_y_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
labs(
title = "",
x = "Time After Release",
y = "",
color = "Current Smoking Rate"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
text = element_text(size = 24, face = "bold"))
p
