# Analyze incarceration data

rm(list=ls())
  
renv::activate()


# Load packages ---------

library(here)
## here() starts at /users/akhann16/code/cadre/data-analysis-plotting/Simulated-Data-Analysis/r/agent-analysis
library(data.table)
library(yaml)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Utility functions ------------

#source(here("agent-analysis", "utils", "post-release-alcohol.R"))
#print(here("agent-analysis", "utils", "post-release-alcohol.R")) 
source("/users/akhann16/code/cadre/data-analysis-plotting/Simulated-Data-Analysis/r/agent-analysis/utils/post-release-alcohol.R")


# Read RDS file ------------


#agent_log_env <- readRDS(here("agent-analysis", "rds-outs", "agent_log_env.RDS"))
print(here("agent-analysis", "rds-outs", "agent_log_env.RDS"))
## [1] "/users/akhann16/code/cadre/data-analysis-plotting/Simulated-Data-Analysis/r/agent-analysis/agent-analysis/rds-outs/agent_log_env.RDS"
agent_log_env <- readRDS("/users/akhann16/code/cadre/data-analysis-plotting/Simulated-Data-Analysis/r/agent-analysis/rds-outs/agent_log_env.RDS")



# Load data ------------

agent_dt <- agent_log_env[["agent_dt"]]
input_params <- agent_log_env[["input_params"]]


# Ticks of interest ----------

selected_ticks <- seq(1, max(agent_dt$tick), by = 10)
last_tick <- max(agent_dt$tick)


# Population subsets by release status --------
desired_tick=last_tick

all_agents <- agent_dt[tick==desired_tick,] #everyone at all times

never_released_agents <- agent_dt[tick==desired_tick & n_releases == 0] #never release

released_agents <- agent_dt[tick == desired_tick & n_releases >= 1] #released at least once


# Alcohol use state distributions in above subsets --------

all_agents_summary <- calculate_proportions(all_agents)
never_released_agents_summary <- calculate_proportions(never_released_agents)
released_agents_summary <- calculate_proportions(released_agents)


# Summarize alcohol use rates in general population --------


alcohol_summaries <- 
  lapply(selected_ticks, function(x) {
    agents_at_tick <- agent_dt[tick == x]
    proportions <- calculate_proportions(agents_at_tick)
    return(list("proportions" = proportions, "tick" = x))
  })

alcohol_proportions_df <- 
  do.call(rbind, lapply(alcohol_summaries, function(x) {
    df <- x$proportions
    df$Tick <- x$tick
    return(df)
  }
  )
  )

head(alcohol_summaries)
## [[1]]
## [[1]]$proportions
##       Category Proportion
## 1 Non-Drinking 0.34526547
## 2        Cat I 0.55944406
## 3       Cat II 0.04739526
## 4      Cat III 0.04789521
## 
## [[1]]$tick
## [1] 1
## 
## 
## [[2]]
## [[2]]$proportions
##       Category Proportion
## 1 Non-Drinking     0.3453
## 2        Cat I     0.5596
## 3       Cat II     0.0474
## 4      Cat III     0.0477
## 
## [[2]]$tick
## [1] 11
## 
## 
## [[3]]
## [[3]]$proportions
##       Category Proportion
## 1 Non-Drinking     0.3455
## 2        Cat I     0.5595
## 3       Cat II     0.0474
## 4      Cat III     0.0476
## 
## [[3]]$tick
## [1] 21
## 
## 
## [[4]]
## [[4]]$proportions
##       Category Proportion
## 1 Non-Drinking     0.3456
## 2        Cat I     0.5594
## 3       Cat II     0.0474
## 4      Cat III     0.0476
## 
## [[4]]$tick
## [1] 31
## 
## 
## [[5]]
## [[5]]$proportions
##       Category Proportion
## 1 Non-Drinking 0.34566543
## 2        Cat I 0.55934407
## 3       Cat II 0.04739526
## 4      Cat III 0.04759524
## 
## [[5]]$tick
## [1] 41
## 
## 
## [[6]]
## [[6]]$proportions
##       Category Proportion
## 1 Non-Drinking     0.3455
## 2        Cat I     0.5597
## 3       Cat II     0.0474
## 4      Cat III     0.0474
## 
## [[6]]$tick
## [1] 51
head(alcohol_proportions_df)
##       Category Proportion Tick
## 1 Non-Drinking 0.34526547    1
## 2        Cat I 0.55944406    1
## 3       Cat II 0.04739526    1
## 4      Cat III 0.04789521    1
## 5 Non-Drinking 0.34530000   11
## 6        Cat I 0.55960000   11
tail(alcohol_summaries)
## [[1]]
## [[1]]$proportions
##       Category Proportion
## 1 Non-Drinking 0.35259422
## 2        Cat I 0.55433370
## 3       Cat II 0.04748575
## 4      Cat III 0.04558632
## 
## [[1]]$tick
## [1] 10891
## 
## 
## [[2]]
## [[2]]$proportions
##       Category Proportion
## 1 Non-Drinking 0.35249425
## 2        Cat I 0.55433370
## 3       Cat II 0.04748575
## 4      Cat III 0.04568629
## 
## [[2]]$tick
## [1] 10901
## 
## 
## [[3]]
## [[3]]$proportions
##       Category Proportion
## 1 Non-Drinking     0.3524
## 2        Cat I     0.5542
## 3       Cat II     0.0477
## 4      Cat III     0.0457
## 
## [[3]]$tick
## [1] 10911
## 
## 
## [[4]]
## [[4]]$proportions
##       Category Proportion
## 1 Non-Drinking 0.35266473
## 2        Cat I 0.55394461
## 3       Cat II 0.04769523
## 4      Cat III 0.04569543
## 
## [[4]]$tick
## [1] 10921
## 
## 
## [[5]]
## [[5]]$proportions
##       Category Proportion
## 1 Non-Drinking     0.3527
## 2        Cat I     0.5540
## 3       Cat II     0.0476
## 4      Cat III     0.0457
## 
## [[5]]$tick
## [1] 10931
## 
## 
## [[6]]
## [[6]]$proportions
##       Category Proportion
## 1 Non-Drinking     0.3529
## 2        Cat I     0.5539
## 3       Cat II     0.0476
## 4      Cat III     0.0456
## 
## [[6]]$tick
## [1] 10941
tail(alcohol_proportions_df)
##          Category Proportion  Tick
## 4375       Cat II     0.0476 10931
## 4376      Cat III     0.0457 10931
## 4377 Non-Drinking     0.3529 10941
## 4378        Cat I     0.5539 10941
## 4379       Cat II     0.0476 10941
## 4380      Cat III     0.0456 10941
# Compare alcohol use rates in short time frame after release (1 year) --------

fixed_time_after_release <- 365

# filter agents who were released at least once and within one year after release
within_fixed_time_after_release_agents <- agent_dt[last_release_tick > 0 & tick <= last_release_tick + fixed_time_after_release]

# calculate proportions for agents within one year after release
within_fixed_time_after_release_summary <- calculate_proportions(within_fixed_time_after_release_agents)

# display the summary
print("Summary for Agents Within One Year After Release")
## [1] "Summary for Agents Within One Year After Release"
print(within_fixed_time_after_release_summary)
##       Category Proportion
## 1 Non-Drinking  0.2733182
## 2        Cat I  0.3971323
## 3       Cat II  0.0376628
## 4      Cat III  0.2918867
# Sensitivity Analysis on parameter of time after release ---------

# time periods in ticks (days): 1 day, 1 week, 2 weeks, 1 month, 3 months, 6 months, 1 year
time_periods <- c(1, 7, 14, 30, 90, 180, 365)

# calculate summaries for each time period
summaries <- lapply(time_periods, function(x)
  summary_after_release(x, agent_dt = agent_dt))

# display summaries
names(summaries) <- paste(time_periods, "days after release")
summaries
## $`1 days after release`
##       Category Proportion
## 1 Non-Drinking 0.22758621
## 2        Cat I 0.33793103
## 3       Cat II 0.05517241
## 4      Cat III 0.37931034
## 
## $`7 days after release`
##       Category Proportion
## 1 Non-Drinking 0.24522901
## 2        Cat I 0.36450382
## 3       Cat II 0.03053435
## 4      Cat III 0.35973282
## 
## $`14 days after release`
##       Category Proportion
## 1 Non-Drinking 0.25376648
## 2        Cat I 0.35687382
## 3       Cat II 0.03201507
## 4      Cat III 0.35734463
## 
## $`30 days after release`
##       Category Proportion
## 1 Non-Drinking 0.25132743
## 2        Cat I 0.35840708
## 3       Cat II 0.03318584
## 4      Cat III 0.35707965
## 
## $`90 days after release`
##       Category Proportion
## 1 Non-Drinking 0.25803940
## 2        Cat I 0.36821615
## 3       Cat II 0.03355914
## 4      Cat III 0.34018531
## 
## $`180 days after release`
##       Category Proportion
## 1 Non-Drinking  0.2653661
## 2        Cat I  0.3801552
## 3       Cat II  0.0344871
## 4      Cat III  0.3199916
## 
## $`365 days after release`
##       Category Proportion
## 1 Non-Drinking  0.2733182
## 2        Cat I  0.3971323
## 3       Cat II  0.0376628
## 4      Cat III  0.2918867
# Plot heavy drinking/AUD after release ---------

labels <- c("1D", "1W", "2W", "1M", "3M", "6M", "1Y")
heavy_use_or_AUD_proportions <- sapply(summaries, function(x) sum(x[x$Category == "Cat III", "Proportion"]))
heavy_use_or_AUD_proportions
##   1 days after release   7 days after release  14 days after release 
##              0.3793103              0.3597328              0.3573446 
##  30 days after release  90 days after release 180 days after release 
##              0.3570796              0.3401853              0.3199916 
## 365 days after release 
##              0.2918867
heavy_use_AUD_df <- data.frame(Time = time_periods,
                               Labels = labels,
                               Proportion = heavy_use_or_AUD_proportions
                               )


ggplot(heavy_use_AUD_df, aes(x = Time, y = Proportion, group = 1)) +
  geom_line() +
  geom_point() +
  scale_x_continuous(breaks = time_periods, labels = labels) +
  scale_y_continuous(breaks = seq(0, 0.5, 0.1), limits = c(0, 0.5)) +
  labs(title = "",
       x = "Time After Release",
       y = "Proportion of Agents with AUD") +
  theme_minimal()+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

# Compare Heavy Use/AUD in general population with the post-release group  ---------

# extract rows for heavy use/AUD only
general_heavy_AUD_df <-
  alcohol_proportions_df[alcohol_proportions_df$Category == 'Cat III', ]


# aggregate the heavy/AUD data
combined_proportions <- aggregate(Proportion ~ Tick, data = general_heavy_AUD_df, sum)
head(combined_proportions)
##   Tick Proportion
## 1    1 0.04789521
## 2   11 0.04770000
## 3   21 0.04760000
## 4   31 0.04760000
## 5   41 0.04759524
## 6   51 0.04740000
# map the 'Tick' column to the same scale as the original 'heavy_use_AUD_df' dataframe


# compute average of AUD rates for the last 10 ticks
last_combined_rate <- mean(combined_proportions$Proportion[(nrow(combined_proportions) - 9):nrow(combined_proportions)])

# Create a new dataframe with these rates for the time points
last_combined_rate_df <- data.frame(Time = max(heavy_use_AUD_df$Time):365,
                          Proportion = rep(last_combined_rate, 365 - max(heavy_use_AUD_df$Time) + 1))


general_population <- data.frame(
  Time = heavy_use_AUD_df$Time,
  Proportion = last_combined_rate_df$Proportion)


# Plotting both datasets
p <- 
  ggplot() +
  geom_line(data = heavy_use_AUD_df, aes(x = Time, y = Proportion, color = "Previously Incarcerated"), linewidth=1.5) +
  geom_line(data = general_population, aes(x = Time, y = Proportion, color = "Overall"), linewidth=1.5)+
  ylim(c(0,1))+
  scale_color_manual(values = c("Previously Incarcerated" = "blue", "Overall" = "red")) +
  theme_minimal() +
  labs(title = "",
       x = "Time After Release",
       y = "Proportion",
       color = "Population") +
  scale_x_continuous(breaks = heavy_use_AUD_df$Time, labels = heavy_use_AUD_df$Labels) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        text = element_text(size = 24, face = "bold"))

p