# Analyze incarceration data

rm(list=ls())

# Load R environment ---------

renv::activate()


# Load packages ---------

library(here)
## here() starts at /users/akhann16/code/cadre/data-analysis-plotting/Simulated-Data-Analysis/r
library(data.table)
library(yaml)
library(ggplot2)
library(DiagrammeR)


# Read RDS file ------------

network_log_env <- readRDS(here("network-log-analysis", "rds-outs", "network_log_env.RDS"))


# Load data ------------

network_dt <- network_log_env[["network_dt"]]
input_params <- network_log_env[["input_params"]]
agent_dt <- network_log_env[["agent_dt"]]


# Last tick analysis ------------

last_tick <- max(network_dt$tick)

last_tick_network_dt <- network_dt[tick==last_tick,]
last_tick_agent_dt <- agent_dt[tick == last_tick,]

head(last_tick_network_dt)
##     tick p1    p2
## 1: 10941  0    15
## 2: 10941  0 13609
## 3: 10941  0 14827
## 4: 10941  0 15236
## 5: 10941  2  3018
## 6: 10941  2  6668
head(last_tick_agent_dt)
##     tick id age  race female alc_use_status smoking_status
## 1: 10941  0  83 White      1              1         Former
## 2: 10941  2  60 White      1              1          Never
## 3: 10941  3  57 White      1              2          Never
## 4: 10941  5  48 White      0              1          Never
## 5: 10941  6  52 White      1              1          Never
## 6: 10941  7  59 White      1              1          Never
##    last_incarceration_tick last_release_tick current_incarceration_status
## 1:                      -1                -1                            0
## 2:                      -1                -1                            0
## 3:                      -1                -1                            0
## 4:                      -1                -1                            0
## 5:                      -1                -1                            0
## 6:                      -1                -1                            0
##    entry_at_tick exit_at_tick n_incarcerations n_releases n_smkg_stat_trans
## 1:             0           -1                0          0                22
## 2:             0           -1                0          0                 0
## 3:             0           -1                0          0                 0
## 4:             0           -1                0          0                 0
## 5:             0           -1                0          0                 0
## 6:             0           -1                0          0                 0
##    n_alc_use_stat_trans
## 1:                    7
## 2:                   11
## 3:                    6
## 4:                    7
## 5:                    4
## 6:                    7
# Visualize ------------

# Identify the IDs of the recently released agents
recently_released_agents <- last_tick_agent_dt$id[last_tick_agent_dt$last_release_tick > (last_tick - 365)]

# Get the network data for the recently released agents
network_recently_released <- last_tick_network_dt[(last_tick_network_dt$p1 %in% recently_released_agents) | 
                                                    (last_tick_network_dt$p2 %in% recently_released_agents),]

# Get the first-degree network (neighbors) for each agent
first_degree_neighbors <- unique(c(network_recently_released$p1, network_recently_released$p2))

# Get agent data for the first-degree neighbors
first_degree_neighbors_agent_data <- last_tick_agent_dt[last_tick_agent_dt$id %in% first_degree_neighbors,]

# Create an edge data frame from network data
edf <- data.frame(from = network_recently_released$p1, 
                  to = network_recently_released$p2)

# Create a node data frame with  agent data
ndf <- data.frame(id = first_degree_neighbors_agent_data$id, 
                  smoking_status = first_degree_neighbors_agent_data$smoking_status,
                  alc_use_status = first_degree_neighbors_agent_data$alc_use_status,
                  recently_released = ifelse(first_degree_neighbors_agent_data$id %in% recently_released_agents, "yes", "no"))

# Create a graph with DiagrammeR
graph <- create_graph(nodes_df = ndf, 
                      edges_df = edf,
                      directed = FALSE)
## Warning in system("timedatectl", intern = TRUE): running command 'timedatectl'
## had status 1
# Customize the node fillcolor based on smoking_status, alc_use_status and recently_released
graph <- set_node_attrs(graph, "fillcolor", ifelse(ndf$smoking_status == "Current", "red", 
                                                   ifelse(ndf$alc_use_status == 3, "blue", 
                                                          ifelse(ndf$recently_released == "yes", "black", "white"))))

# Customize the node color based on smoking_status, alc_use_status and recently_released
graph <- set_node_attrs(graph, "color", ifelse(ndf$smoking_status == "Current", "red", 
                                               ifelse(ndf$alc_use_status == 3, "blue", 
                                                      ifelse(ndf$recently_released == "yes", "black", "gray"))))

# Customize the node shape based on recently_released
graph <- set_node_attrs(graph, "shape", ifelse(ndf$recently_released == "yes", "square", "circle"))

# Remove labels from nodes to make them stand out more
graph <- set_node_attrs(graph, "label", "")

# Customize the edge color and line type
graph <- set_edge_attrs(graph, "color", "black")  # Change the edge color to gray
graph <- set_edge_attrs(graph, "style", "solid")  # Change the edge line type to dashed

# Customize the edge thickness
graph <- set_edge_attrs(graph, "penwidth", 2)  # Increase the edge thickness

# Customize the outline color of all nodes
graph <- set_node_attrs(graph, "color", "darkgray") 

# Render the graph
render_graph(graph)
# Create a legend
legend <- rbind(data.frame(group = "Recently Released", shape = "square", color = "black"),
                data.frame(group = "Currently Smoking", shape = "circle", color = "red"),
                data.frame(group = "AUD", shape = "circle", color = "blue"))


# Prevalence of Current Smoking and AUD among network members:

# Number of agents with Current Smoking status
num_current_smoking <- sum(first_degree_neighbors_agent_data$smoking_status == "Current")

# Number of agents with AUD
num_aud <- sum(first_degree_neighbors_agent_data$alc_use_status == 3)

# Total number of agents in the network
total_agents <- nrow(first_degree_neighbors_agent_data)

# Calculate prevalence
prevalence_smoking <- num_current_smoking / total_agents
prevalence_aud <- num_aud / total_agents

# Print the results
cat("Prevalence of Current Smoking in networks of recently released agents: ", prevalence_smoking, "\n")
## Prevalence of Current Smoking in networks of recently released agents:  0.2727273
cat("Prevalence of AUD in networks of recently released agents: ", prevalence_aud, "\n")
## Prevalence of AUD in networks of recently released agents:  0.1818182
# Prevalence of Cyrrent Smoking and AUD among this set of egos ------------

# Define selected_agents_df, which contains the data for the selected agents
recently_released_agents_df <- last_tick_agent_dt[last_tick_agent_dt$id %in% recently_released_agents,]

# Number of selected agents who are current smokers
num_ego_current_smoking <- sum(recently_released_agents_df$smoking_status == "Current")

# Number of selected agents with AUD
num_ego_aud <- sum(recently_released_agents_df$alc_use_status == 3)

# Total number of selected agents
total_ego_agents <- length(recently_released_agents)

# Calculate prevalence
prevalence_ego_smoking <- num_ego_current_smoking / total_ego_agents
prevalence_ego_aud <- num_ego_aud / total_ego_agents

# Print the results
cat("Prevalence of Current Smoking among recently released agents: ", prevalence_ego_smoking, "\n")
## Prevalence of Current Smoking among recently released agents:  0.5483871
cat("Prevalence of AUD among recently released agents: ", prevalence_ego_aud, "\n")
## Prevalence of AUD among recently released agents:  0.2580645